{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark import SparkContext\n",
    "from pyspark import SparkConf\n",
    "#sc = SparkContext()\n",
    "import pyspark\n",
    "from pyspark import SparkConf, SparkContext \n",
    "import pandas as pd \n",
    "from pyspark.sql import functions as f\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib import rcParams\n",
    "from pyspark.sql.functions import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "spark_conf = SparkConf().set('spark.executor.memory', '6G').set('spark.driver.memory', '6G').set('spark.driver.maxResultSize', '6G')\n",
    "# spark_conf = SparkConf().setAppName(\"test\")\n",
    "sc = SparkContext(conf = spark_conf)\n",
    "# sc = SparkContext.getOrCreate()\n",
    "from pyspark.sql import SQLContext\n",
    "sqlContext = SQLContext(sc)\n",
    "from pyspark.sql.session import SparkSession\n",
    "spark = SparkSession(sc)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Read Accident data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = spark.read.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/data_folder/accident_data/Accident.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_pd=df.limit(10).toPandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Source</th>\n",
       "      <th>TMC</th>\n",
       "      <th>Severity</th>\n",
       "      <th>Start_Time</th>\n",
       "      <th>End_Time</th>\n",
       "      <th>Start_Lat</th>\n",
       "      <th>Start_Lng</th>\n",
       "      <th>End_Lat</th>\n",
       "      <th>End_Lng</th>\n",
       "      <th>...</th>\n",
       "      <th>Roundabout</th>\n",
       "      <th>Station</th>\n",
       "      <th>Stop</th>\n",
       "      <th>Traffic_Calming</th>\n",
       "      <th>Traffic_Signal</th>\n",
       "      <th>Turning_Loop</th>\n",
       "      <th>Sunrise_Sunset</th>\n",
       "      <th>Civil_Twilight</th>\n",
       "      <th>Nautical_Twilight</th>\n",
       "      <th>Astronomical_Twilight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>A-2476200</td>\n",
       "      <td>Bing</td>\n",
       "      <td>None</td>\n",
       "      <td>4</td>\n",
       "      <td>2017-07-12 17:49:09</td>\n",
       "      <td>2017-07-12 23:49:09</td>\n",
       "      <td>33.769969</td>\n",
       "      <td>-84.519786</td>\n",
       "      <td>33.770300</td>\n",
       "      <td>-84.519180</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>A-2476201</td>\n",
       "      <td>Bing</td>\n",
       "      <td>None</td>\n",
       "      <td>2</td>\n",
       "      <td>2017-07-12 16:55:07</td>\n",
       "      <td>2017-07-12 22:55:07</td>\n",
       "      <td>42.985070</td>\n",
       "      <td>-87.915350</td>\n",
       "      <td>42.977940</td>\n",
       "      <td>-87.916110</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>A-2476202</td>\n",
       "      <td>Bing</td>\n",
       "      <td>None</td>\n",
       "      <td>3</td>\n",
       "      <td>2017-07-12 16:52:48</td>\n",
       "      <td>2017-07-12 22:52:48</td>\n",
       "      <td>36.121430</td>\n",
       "      <td>-86.756330</td>\n",
       "      <td>36.123690</td>\n",
       "      <td>-86.747710</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>A-2476203</td>\n",
       "      <td>Bing</td>\n",
       "      <td>None</td>\n",
       "      <td>3</td>\n",
       "      <td>2017-07-12 16:55:07</td>\n",
       "      <td>2017-07-12 22:55:07</td>\n",
       "      <td>41.761268</td>\n",
       "      <td>-87.945109</td>\n",
       "      <td>41.761295</td>\n",
       "      <td>-87.945109</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>A-2476204</td>\n",
       "      <td>Bing</td>\n",
       "      <td>None</td>\n",
       "      <td>2</td>\n",
       "      <td>2017-07-12 17:57:14</td>\n",
       "      <td>2017-07-12 23:57:14</td>\n",
       "      <td>43.703396</td>\n",
       "      <td>-70.313522</td>\n",
       "      <td>43.692033</td>\n",
       "      <td>-70.320149</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>A-2476205</td>\n",
       "      <td>Bing</td>\n",
       "      <td>None</td>\n",
       "      <td>3</td>\n",
       "      <td>2017-07-12 16:59:29</td>\n",
       "      <td>2017-07-12 22:59:29</td>\n",
       "      <td>35.979230</td>\n",
       "      <td>-86.575250</td>\n",
       "      <td>35.971110</td>\n",
       "      <td>-86.568860</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>A-2476206</td>\n",
       "      <td>Bing</td>\n",
       "      <td>None</td>\n",
       "      <td>3</td>\n",
       "      <td>2017-07-12 18:05:29</td>\n",
       "      <td>2017-07-13 00:05:29</td>\n",
       "      <td>40.898080</td>\n",
       "      <td>-74.247540</td>\n",
       "      <td>40.896300</td>\n",
       "      <td>-74.254890</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>A-2476207</td>\n",
       "      <td>Bing</td>\n",
       "      <td>None</td>\n",
       "      <td>2</td>\n",
       "      <td>2017-07-12 18:11:29</td>\n",
       "      <td>2017-07-13 00:11:29</td>\n",
       "      <td>40.819015</td>\n",
       "      <td>-73.934388</td>\n",
       "      <td>40.819015</td>\n",
       "      <td>-73.934388</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>A-2476208</td>\n",
       "      <td>Bing</td>\n",
       "      <td>None</td>\n",
       "      <td>3</td>\n",
       "      <td>2017-07-12 18:13:48</td>\n",
       "      <td>2017-07-13 00:13:48</td>\n",
       "      <td>35.015940</td>\n",
       "      <td>-85.278270</td>\n",
       "      <td>35.017230</td>\n",
       "      <td>-85.282640</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>A-2476209</td>\n",
       "      <td>Bing</td>\n",
       "      <td>None</td>\n",
       "      <td>2</td>\n",
       "      <td>2017-07-12 18:27:19</td>\n",
       "      <td>2017-07-13 00:27:19</td>\n",
       "      <td>39.311562</td>\n",
       "      <td>-76.952013</td>\n",
       "      <td>39.310469</td>\n",
       "      <td>-76.952345</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "      <td>Day</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 49 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          ID Source   TMC  Severity          Start_Time            End_Time  \\\n",
       "0  A-2476200   Bing  None         4 2017-07-12 17:49:09 2017-07-12 23:49:09   \n",
       "1  A-2476201   Bing  None         2 2017-07-12 16:55:07 2017-07-12 22:55:07   \n",
       "2  A-2476202   Bing  None         3 2017-07-12 16:52:48 2017-07-12 22:52:48   \n",
       "3  A-2476203   Bing  None         3 2017-07-12 16:55:07 2017-07-12 22:55:07   \n",
       "4  A-2476204   Bing  None         2 2017-07-12 17:57:14 2017-07-12 23:57:14   \n",
       "5  A-2476205   Bing  None         3 2017-07-12 16:59:29 2017-07-12 22:59:29   \n",
       "6  A-2476206   Bing  None         3 2017-07-12 18:05:29 2017-07-13 00:05:29   \n",
       "7  A-2476207   Bing  None         2 2017-07-12 18:11:29 2017-07-13 00:11:29   \n",
       "8  A-2476208   Bing  None         3 2017-07-12 18:13:48 2017-07-13 00:13:48   \n",
       "9  A-2476209   Bing  None         2 2017-07-12 18:27:19 2017-07-13 00:27:19   \n",
       "\n",
       "   Start_Lat  Start_Lng    End_Lat    End_Lng  ...  Roundabout Station   Stop  \\\n",
       "0  33.769969 -84.519786  33.770300 -84.519180  ...       False   False  False   \n",
       "1  42.985070 -87.915350  42.977940 -87.916110  ...       False   False  False   \n",
       "2  36.121430 -86.756330  36.123690 -86.747710  ...       False   False  False   \n",
       "3  41.761268 -87.945109  41.761295 -87.945109  ...       False   False  False   \n",
       "4  43.703396 -70.313522  43.692033 -70.320149  ...       False   False  False   \n",
       "5  35.979230 -86.575250  35.971110 -86.568860  ...       False   False  False   \n",
       "6  40.898080 -74.247540  40.896300 -74.254890  ...       False   False  False   \n",
       "7  40.819015 -73.934388  40.819015 -73.934388  ...       False   False  False   \n",
       "8  35.015940 -85.278270  35.017230 -85.282640  ...       False   False  False   \n",
       "9  39.311562 -76.952013  39.310469 -76.952345  ...       False   False  False   \n",
       "\n",
       "  Traffic_Calming Traffic_Signal Turning_Loop Sunrise_Sunset Civil_Twilight  \\\n",
       "0           False           True        False            Day            Day   \n",
       "1           False          False        False            Day            Day   \n",
       "2           False          False        False            Day            Day   \n",
       "3           False          False        False            Day            Day   \n",
       "4           False          False        False            Day            Day   \n",
       "5           False          False        False            Day            Day   \n",
       "6           False          False        False            Day            Day   \n",
       "7           False          False        False            Day            Day   \n",
       "8           False          False        False            Day            Day   \n",
       "9           False          False        False            Day            Day   \n",
       "\n",
       "  Nautical_Twilight Astronomical_Twilight  \n",
       "0               Day                   Day  \n",
       "1               Day                   Day  \n",
       "2               Day                   Day  \n",
       "3               Day                   Day  \n",
       "4               Day                   Day  \n",
       "5               Day                   Day  \n",
       "6               Day                   Day  \n",
       "7               Day                   Day  \n",
       "8               Day                   Day  \n",
       "9               Day                   Day  \n",
       "\n",
       "[10 rows x 49 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['ID',\n",
       " 'Source',\n",
       " 'TMC',\n",
       " 'Severity',\n",
       " 'Start_Time',\n",
       " 'End_Time',\n",
       " 'Start_Lat',\n",
       " 'Start_Lng',\n",
       " 'End_Lat',\n",
       " 'End_Lng',\n",
       " 'Distance',\n",
       " 'Description',\n",
       " 'Number',\n",
       " 'Street',\n",
       " 'Side',\n",
       " 'City',\n",
       " 'County',\n",
       " 'State',\n",
       " 'Zipcode',\n",
       " 'Country',\n",
       " 'Timezone',\n",
       " 'Airport_Code',\n",
       " 'Weather_Timestamp',\n",
       " 'Temperature',\n",
       " 'Wind_Chill',\n",
       " 'Humidity',\n",
       " 'Pressure',\n",
       " 'Visibility',\n",
       " 'Wind_Direction',\n",
       " 'Wind_Speed',\n",
       " 'Precipitation',\n",
       " 'Weather_Condition',\n",
       " 'Amenity',\n",
       " 'Bump',\n",
       " 'Crossing',\n",
       " 'Give_Way',\n",
       " 'Junction',\n",
       " 'No_Exit',\n",
       " 'Railway',\n",
       " 'Roundabout',\n",
       " 'Station',\n",
       " 'Stop',\n",
       " 'Traffic_Calming',\n",
       " 'Traffic_Signal',\n",
       " 'Turning_Loop',\n",
       " 'Sunrise_Sunset',\n",
       " 'Civil_Twilight',\n",
       " 'Nautical_Twilight',\n",
       " 'Astronomical_Twilight']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Visualization"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Temperature vs accident"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-----------+-----+\n",
      "|Temperature|count|\n",
      "+-----------+-----+\n",
      "|       69.8|15067|\n",
      "|       74.5|  503|\n",
      "|       64.2|  333|\n",
      "|       49.8|  256|\n",
      "|       56.8|  418|\n",
      "|       14.9|   84|\n",
      "|      -12.3|    4|\n",
      "|       15.4|   90|\n",
      "|       47.5|  380|\n",
      "|      107.8|    5|\n",
      "|       72.3|  361|\n",
      "|       70.0|52212|\n",
      "|       96.8| 1604|\n",
      "|       67.0|14092|\n",
      "|        8.0|  128|\n",
      "|       45.3|  287|\n",
      "|       10.2|   40|\n",
      "|       55.8|  440|\n",
      "|       44.8|  386|\n",
      "|       56.5|  429|\n",
      "+-----------+-----+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.groupby('Temperature').count().show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Binning "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_with_temp_range = df.withColumn('TemperatureRange', f.when(f.col('Temperature') < 0, \"<0\").\n",
    "              when((f.col('Temperature') >= 0) & (f.col('Temperature') < 10), \"0-10\").\n",
    "              when((f.col('Temperature') >= 10) & (f.col('Temperature') < 20), \"10-20\").\n",
    "              when((f.col('Temperature') >= 20) & (f.col('Temperature') < 30), \"20-30\").\n",
    "              when((f.col('Temperature') >= 30) & (f.col('Temperature') < 40), \"30-40\").\n",
    "              when((f.col('Temperature') >= 40) & (f.col('Temperature') < 50), \"40-50\").\n",
    "              when((f.col('Temperature') >= 50) & (f.col('Temperature') < 60), \"50-60\").\n",
    "              when((f.col('Temperature') >= 60) & (f.col('Temperature') < 70), \"60-70\").\n",
    "              when((f.col('Temperature') >= 70) & (f.col('Temperature') < 80), \"70-80\").\n",
    "              when((f.col('Temperature') >= 80) & (f.col('Temperature') < 90), \"80-90\").\n",
    "              when((f.col('Temperature') >= 90) & (f.col('Temperature') < 100), \"90-100\").\n",
    "              when((f.col('Temperature')) >= 100 , \">100\").\n",
    "              otherwise(\"NA\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Convert to pandas df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_temp_range_count = df_with_temp_range.groupby('TemperatureRange').count().toPandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>TemperatureRange</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>&gt;100</td>\n",
       "      <td>13221</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>40-50</td>\n",
       "      <td>292225</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>NA</td>\n",
       "      <td>56063</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>20-30</td>\n",
       "      <td>99794</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>80-90</td>\n",
       "      <td>411734</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>60-70</td>\n",
       "      <td>585878</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>&lt;0</td>\n",
       "      <td>5061</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>50-60</td>\n",
       "      <td>483909</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0-10</td>\n",
       "      <td>13039</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>30-40</td>\n",
       "      <td>239476</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>10-20</td>\n",
       "      <td>42485</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>70-80</td>\n",
       "      <td>610008</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>90-100</td>\n",
       "      <td>121442</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   TemperatureRange   count\n",
       "0              >100   13221\n",
       "1             40-50  292225\n",
       "2                NA   56063\n",
       "3             20-30   99794\n",
       "4             80-90  411734\n",
       "5             60-70  585878\n",
       "6                <0    5061\n",
       "7             50-60  483909\n",
       "8              0-10   13039\n",
       "9             30-40  239476\n",
       "10            10-20   42485\n",
       "11            70-80  610008\n",
       "12           90-100  121442"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_temp_range_count"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## drop row containing NA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_temp_acc=df_temp_range_count.drop([2])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## sort temperature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_sorted_temp = df_temp_acc.sort_values('TemperatureRange')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>TemperatureRange</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0-10</td>\n",
       "      <td>13039</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>10-20</td>\n",
       "      <td>42485</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>20-30</td>\n",
       "      <td>99794</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>30-40</td>\n",
       "      <td>239476</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>40-50</td>\n",
       "      <td>292225</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>50-60</td>\n",
       "      <td>483909</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>60-70</td>\n",
       "      <td>585878</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>70-80</td>\n",
       "      <td>610008</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>80-90</td>\n",
       "      <td>411734</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>90-100</td>\n",
       "      <td>121442</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>&lt;0</td>\n",
       "      <td>5061</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>&gt;100</td>\n",
       "      <td>13221</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   TemperatureRange   count\n",
       "8              0-10   13039\n",
       "10            10-20   42485\n",
       "3             20-30   99794\n",
       "9             30-40  239476\n",
       "1             40-50  292225\n",
       "7             50-60  483909\n",
       "5             60-70  585878\n",
       "11            70-80  610008\n",
       "4             80-90  411734\n",
       "12           90-100  121442\n",
       "6                <0    5061\n",
       "0              >100   13221"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sorted_temp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "target_row = 10\n",
    "# Move target row to first element of list.\n",
    "idx = [target_row] + [i for i in range(len(df_sorted_temp)) if i != target_row]\n",
    "\n",
    "final_temp_acc=df_sorted_temp.iloc[idx]\n",
    " "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>TemperatureRange</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>&lt;0</td>\n",
       "      <td>5061</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0-10</td>\n",
       "      <td>13039</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>10-20</td>\n",
       "      <td>42485</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>20-30</td>\n",
       "      <td>99794</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>30-40</td>\n",
       "      <td>239476</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>40-50</td>\n",
       "      <td>292225</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>50-60</td>\n",
       "      <td>483909</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>60-70</td>\n",
       "      <td>585878</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>70-80</td>\n",
       "      <td>610008</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>80-90</td>\n",
       "      <td>411734</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>90-100</td>\n",
       "      <td>121442</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>&gt;100</td>\n",
       "      <td>13221</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   TemperatureRange   count\n",
       "6                <0    5061\n",
       "8              0-10   13039\n",
       "10            10-20   42485\n",
       "3             20-30   99794\n",
       "9             30-40  239476\n",
       "1             40-50  292225\n",
       "7             50-60  483909\n",
       "5             60-70  585878\n",
       "11            70-80  610008\n",
       "4             80-90  411734\n",
       "12           90-100  121442\n",
       "0              >100   13221"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "final_temp_acc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deZwdVZ338c83C4QdAjFCQggCKqAQMAIqSiAsQRzDzLCICAkCmRkR9Hl0NI48Awpo0FEWFxgGUDbNAMoQBjBEFh0YAgTZZJOACekQAiQh7EvI7/njnE4ql9vdN0nXvdXp7/v1qldXnVp+p+p2319X1ak6igjMzMyqpk+rK2BmZlaPE5SZmVWSE5SZmVWSE5SZmVWSE5SZmVWSE5SZmVWSE5SZNY2kYZJekdS3g/mnSrq82fWyanKCssrJX2Dtw1JJrxemj2x1/VaFpFmS9m11PVaFpPXzsb9xdbcVEU9HxPoR8U531K0jkoZLCkn9yoxj5fKHZ5UTEeu3j0uaBRwXEb9vXY06J6lfRCzp6TE68ffAm8B+kt4bEc+2qB7Wy/gMynoMSX0kTZT0pKQFkq6UNDDPa/+P+RhJcyQtkvSPkj4q6UFJL0r6aWFb4yXdIemnkhZLekzS6ML8jSRdJGmepLmSTm+/LFVY9yxJC4BTJW0j6ZZcrxckXSFp47z8ZcAw4Lp8JvINSaMktdXs37KzrHyp62pJl0t6CRjfWZ1qtrNFPuscWCjbJderv6RtJf0h7/cLkv6zi0M/DjgfeBD4Qk2sPSX9bz6+cySNz+XrSPqRpNk5zu25bIUzG0lb57q8LGkasFnN9vcobP8BSaMK826TdFr+LF6WdJOk9vX/mH++mI/5x1Zhv63VIsKDh8oOwCxg3zz+FWA6MBRYG/h34Nd53nAgSF+kA4D9gTeA/wLeAwwBngP2ysuPB5YA/wfoDxwOLAYG5vnX5O2vl9e/G/iHmnVPJF2FWAfYFtgv12sQ6Qvy7Hr7kadHAW2d7OupwNvAwaR/JNfprE51jtstwPGF6R8C5+fxXwPfztsdAOzZyfHfClgK7AB8DXiwZt7LwBH5GG4KjMjzfgbclo97X+Dj+di0f0798nJ3Aj/O8z6Vt3d5njcEWAB8Otd1vzw9KM+/DXgSeH8+PrcBk2p+H/oV6tvwfnuoxtDyCnjw0NlQ86X9KDC6MG/z/CXer/CFNKQwfwFweGH6N8BX8/h44BlAhfl3A0cBg0mXtNYpzDsCuLWw7tNd1Ptg4L56+5GnR9F1gvpjYV6ndaoT/zjgljwuYA7wqTx9KXABMLSB438ycH8eHwK8A+ySp78FXFNnnT7A68DOdeYtSxyks8olwHqF+b9ieYL6JnBZzfpTgXF5/Dbg5MK8LwG/q41TmN/wfnuoxuBLfNaTbAVcky/3vEhKWO+QvrzbzS+Mv15nev3C9NzI31zZbGCLHKc/MK8Q699JZy3t5hQrJmmwpMn50ttLwOXUXK5aBcUYjdSp6DfAxyRtTjozWQr8T573DVLSulvSw5K+2EkdjgauAIiIucAfSJf8ALYkncHU2ox0hlJvXtEWwKKIeLVQNrswvhVwaPv+5n3ek/SPSbvi/bDXWPHzrbUy+20V4EYS1pPMAb4YEXfUzpA0fBW2N0SSCklqGDAlx3kT2Cw6bphQ2w3A93LZhyNioaSDgZ92svyrwLqF+vclXRrsKEYjdVq+YsQiSTeRLl1uD0xu389IjRyOz3H3BH4v6Y8RMbO4DUkfB7YDviXpa7l4A+BDkr6e67RbnfAvkC6vbgM80Ek15wGbSFqvkKSGFfZ7DukM6viu9reOd3XT0Oh+W3X4DMp6kvOBMyRtBSBpkKSxq7G99wAn5YYDh5K+yG+IiHnATcCPJG2o1DhjG0l7dbKtDYBXgMWShgD/XDN/PvC+wvRfgAGSDpLUn3Qpbe2ONr6KdfoV6QzokDwOgKRDJQ3Nk4tIX+ZL66w/DphGuv80Ig8fIt3vOZB0ZrWvpMMk9ZO0qaQREbEUuBj4cW6w0Tc3Ulhh/yJiNjAD+I6ktXLS+JvCIpcDfyPpgLyNAUqNS4bStefzPi075iux31YRTlDWk5xDOsO5SdLLpAYTu6/G9u4inSG8AJwBHBIRC/K8o4G1gEdIX2ZXs+KlpVrfAXYlNbS4HvhtzfzvAyfnS1Vfj4jFpHsmFwJzSWdUbXRuZes0Je/fsxFRPJP5KHCXpFfyMl+JiKeKK0oaABwG/CQini0MfwUuI90HeprUgOFrwELgfmDnvImvAw8B9+R5Z1L/++bzpM9wIXAK6T4RABExBxgL/Asp4cwhJf4uv7ci4jXSZ3pHPuZ7NLLfVi1a8RK8We+Qm0MfFxF7trouZlafz6DMzKySnKDMzKySfInPzMwqyWdQZmZWSX4Oqo7NNtsshg8f3upqmJn1Cvfee+8LEVH7HKATVD3Dhw9nxowZra6GmVmvIGl2vXJf4jMzs0pygjIzs0pygjIzs0ryPSgzsyZ5++23aWtr44033mh1VVpiwIABDB06lP79+ze0vBOUmVmTtLW1scEGGzB8+HAktbo6TRURLFiwgLa2NrbeeuuG1vElPjOzJnnjjTfYdNNNe11yApDEpptuulJnj05QZmZN1BuTU7uV3femJShJG0u6WtJjkh7N/cMMlDRN0hP55yZ5WUk6V9JMSQ9K2rWwnXF5+SckjSuUf0TSQ3mdc5WPREcxzMys2pp5D+oc4HcRcYiktUi9if4LcHNETJI0EZgIfJPUGdp2edgdOA/YXdJAUp8xI0mdjd0raUpELMrLHE/q4+cGYAxwY95mvRhmtgYaPvH6VVpv1qSDurkmXVvVunakFftQ6+yzz2bChAmsu+66XS/chaacQUnaCPgUcBFARLwVES+SOiO7JC92CXBwHh8LXBrJdGBjSZsDBwDTImJhTkrTgDF53oYRMT13a31pzbbqxTAzs2529tln89prr3XLtpp1iW9rUo+Yv5B0n6QLJa0HDM5dWQM8CwzO40NIvWe2a8tlnZW31SmnkxgrkDRB0gxJM55//vlV2Uczsx7h0ksvZaeddmLnnXfmqKOOYtasWeyzzz7stNNOjB49mqeffhqA8ePHc/XVVy9bb/311wfgtttuY9SoURxyyCF88IMf5MgjjyQiOPfcc3nmmWfYe++92XvvvVe7ns1KUP1I3WGfFxG7kLq3nlhcIJ/5lNr3R2cxIuKCiBgZESMHDXrXOwvNzNYIDz/8MKeffjq33HILDzzwAOeccw4nnngi48aN48EHH+TII4/kpJNO6nI79913H2effTaPPPIITz31FHfccQcnnXQSW2yxBbfeeiu33nrrate1WQmqDWiLiLvy9NWkhDU/X54j/3wuz58LbFlYf2gu66x8aJ1yOolhZtbr3HLLLRx66KFsttlmAAwcOJA777yTz3/+8wAcddRR3H777V1uZ7fddmPo0KH06dOHESNGMGvWrG6va1MSVEQ8C8yR9IFcNBp4BJgCtLfEGwdcm8enAEfn1nx7AIvzZbqpwP6SNsmt8fYHpuZ5L0naI7feO7pmW/VimJlZJ/r168fSpUsBWLp0KW+99dayeWuvvfay8b59+7JkyZJuj9/M56BOBK6Q9CAwAvgeMAnYT9ITwL55GlIrvKeAmcB/AF8CiIiFwGnAPXn4bi4jL3NhXudJUgs+OolhZtbr7LPPPlx11VUsWLAAgIULF/Lxj3+cyZMnA3DFFVfwyU9+EkhdD917770ATJkyhbfffrvL7W+wwQa8/PLL3VLXpjUzj4j7Sc3Da42us2wAJ3SwnYuBi+uUzwA+VKd8Qb0YZla+ntTkuxVasZ877rgj3/72t9lrr73o27cvu+yyCz/5yU845phj+OEPf8igQYP4xS9+AcDxxx/P2LFj2XnnnRkzZgzrrbdel9ufMGECY8aMWXYvanUo5QIrGjlyZLjDQrPV14oEVeWk+Oijj7L99tuXHqfK6h0DSfdGxLtOYPyqIzMzqyQnKDMzqyQnKDOzJurNt1VWdt+doMzMmmTAgAEsWLCgVyap9v6gBgwY0PA67rDQzKxJhg4dSltbG731dWrtPeo2ygnKzKxJ+vfv33BvsuZLfGZmVlFOUGZmVklOUGZmVklOUGZmVklOUGZmVklOUGZmVklOUGZmVklOUGZmVklOUGZmVklOUGZmVklOUGZmVklOUGZmVklOUGZmVklOUGZmVklNS1CSZkl6SNL9kmbksoGSpkl6Iv/cJJdL0rmSZkp6UNKuhe2My8s/IWlcofwjefsz87rqLIaZmVVbs8+g9o6IERExMk9PBG6OiO2Am/M0wIHAdnmYAJwHKdkApwC7A7sBpxQSznnA8YX1xnQRw8zMKqzVl/jGApfk8UuAgwvll0YyHdhY0ubAAcC0iFgYEYuAacCYPG/DiJgeqS/lS2u2VS+GmZlVWDMTVAA3SbpX0oRcNjgi5uXxZ4HBeXwIMKewblsu66y8rU55ZzFWIGmCpBmSZvTW7pjNzKqkmV2+7xkRcyW9B5gm6bHizIgISVFmBTqLEREXABcAjBw5stR6mJlZ15p2BhURc/PP54BrSPeQ5ufLc+Sfz+XF5wJbFlYfmss6Kx9ap5xOYpiZWYU1JUFJWk/SBu3jwP7An4EpQHtLvHHAtXl8CnB0bs23B7A4X6abCuwvaZPcOGJ/YGqe95KkPXLrvaNrtlUvhpmZVVizLvENBq7JLb/7Ab+KiN9Juge4UtKxwGzgsLz8DcCngZnAa8AxABGxUNJpwD15ue9GxMI8/iXgl8A6wI15AJjUQQwzM6uwpiSoiHgK2LlO+QJgdJ3yAE7oYFsXAxfXKZ8BfKjRGGa9yfCJ16/yurMmHdSNNTFrXKubmZuZmdXlBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXkBGVmZpXUr5nBJPUFZgBzI+IzkrYGJgObAvcCR0XEW5LWBi4FPgIsAA6PiFl5G98CjgXeAU6KiKm5fAxwDtAXuDAiJuXyujGatMtm1gsMn3j9Kq03a9JB3VyTNUuzz6C+AjxamD4TOCsitgUWkRIP+eeiXH5WXg5JOwCfA3YExgA/l9Q3J76fAQcCOwBH5GU7i2FmZhXWtAQlaShwEHBhnhawD3B1XuQS4OA8PjZPk+ePzsuPBSZHxJsR8VdgJrBbHmZGxFP57GgyMLaLGGZmVmHNPIM6G/gGsDRPbwq8GBFL8nQbMCSPDwHmAOT5i/Pyy8pr1umovLMYK5A0QdIMSTOef/75Vd1HMzPrJg0lKElf76D8/za4/meA5yLi3pWoW1NFxAURMTIiRg4aNKjV1TEz6/UaPYP61w7KT25w/U8An5U0i3T5bR9Sg4aNJbU31BgKzM3jc4EtAfL8jUiNJZaV16zTUfmCTmKYmVmFdZqgJO0jaR+gr6S926fzcBzwciNBIuJbETE0IoaTGjncEhFHArcCh+TFxgHX5vEpeZo8/5aIiFz+OUlr59Z52wF3A/cA20naWtJaOcaUvE5HMczMrMK6amZ+Uf45ALi4UB7As8CJqxn/m8BkSacD9xXiXQRcJmkmsJCUcIiIhyVdCTwCLAFOiIh3ACR9GZhKamZ+cUQ83EUMMzOrsE4TVERsDSDp0og4ujsCRsRtwG15/ClSC7zaZd4ADu1g/TOAM+qU3wDcUKe8bgwzM6u2hh7ULSYnSX1q5i199xpmZmarp9FWfLtKulPSq8DbeViSf5qZmXW7Rl91dAlwHfBF4LXyqmNmZpY0mqC2Ar6dW8WZmZmVrtHnoK4B9i+zImZmZkWNnkENAK6RdDupefky3dW6z8zMrKjRBPVIHszMzJqi0Wbm3ym7ImZmZkUNv81c0n6SLpJ0XZ4emV+DZGZm1u0afQ7qROA84AngU7n4deD0kuplZma9XKNnUF8F9s3dqLe/OeIx4AOl1MrMzHq9RhPUBizvELD9Waj+wFvdXiMzMzMaT1B/BCbWlJ1E6srCzMys2zXazPxE4DpJxwMbSHqc1BfUZ0qrmZmZ9WqNNjOfJ+mjpG4rhpEu993tN5mbmVlZGj2DIr+H7648mJmZlarDBCVpDssbRHQoIoZ1a43MzMzo/AzqC4XxjwLjgHOB2aS3m38ZuLS8qpmZWW/WYYKKiD+0j0v6GXBARMwtlN0I/A74Uak1NDOzXqnRZuZbAK/UlL0CDOne6piZmSWNJqgpwJT8Pr7tJe1P6iNqSnlVMzOz3qzRBPWPwJ3A+cCf8s+7cnmXJA2QdLekByQ9LOk7uXxrSXdJminpPyWtlcvXztMz8/zhhW19K5c/LumAQvmYXDZT0sRCed0YZmZWbQ0lqIh4IyImRsQ2EbFORLwvT7/eYJw3gX0iYmdgBDBG0h7AmcBZEbEtsAg4Ni9/LLAol5+Vl0PSDsDngB2BMcDPJfWV1Bf4GXAgsANwRF6WTmKYmVmFdZigJH2qML5PR0MjQSJpv4fVPw8B7ANcncsvAQ7O42PzNHn+aEnK5ZMj4s2I+Cswk/Tw8G7AzIh4KiLeAiYDY/M6HcUwM7MK66yZ+c+BD+XxizpYJoD3NRIon+XcC2xLOtt5EngxIpbkRdpY3uhiCPnltBGxRNJiYNNcPr2w2eI6c2rKd8/rdBSjtn4TgAkAw4b50S4zs1brrJn5hwrjW69uoIh4BxghaWNSA4sPru42u1NEXABcADBy5MguH1A2M7NyNfSqI0kjgAURMadQtiUwMCIeWJmAEfGipFuBjwEbS+qXz3CGAu3PWc0FtgTaJPUDNgIWFMrbFdepV76gkxhmLTF84vWrvO6sSQd1Y03Mqq3RVnyXk+4bFa0FXNbIypIG5TMnJK0D7Ac8Suqu45C82Djg2jw+JU+T59+S3wU4BfhcbuW3NbAdcDdwD7BdbrG3FqkhxZS8TkcxzMyswhp9WeywiHiqWBARTxabf3dhc+CSfB+qD3BlRPy3pEeAyZJOB+5j+b2ui4DLJM0EFpISDhHxsKQrgUeAJcAJ+dIhkr4MTAX6AhdHxMN5W9/sIIaZmVVYowmqTdKuEfGn9gJJuwLPNLJyRDwI7FKn/ClSC7za8jeAQzvY1hnAGXXKbwBuaDSGmZlVW6MJ6izgWkk/ILW+2wb4OnUShZmZWXdotMPC/5D0Iukh1y1JTbq/FhFXd76mmZnZqlmZDguvAq4qsS5mZmbLNNSKT9K5kj5eU/ZxSWeXUy0zM+vtGm1mfgQwo6bsXuDz3VsdMzOzpNEEFXWW7VunzMzMrFs0mmD+BzhdUh+A/PM7udzMzKzbNdpI4ivAfwPzJM0GtiI9A/U3ZVXMzMx6t0abmbflB3N3IzUzn0/qtuJuUnfwZmZm3arhZuakrit2B8YDO5Eu732lhDqZmZl1nqAk9Qc+S0pKB5A6CPw1MAw4LCKeK7uCZmbWO3XVSGI+8O/A48AeEbFDRJwGvFV6zczMrFfr6hLfg8CepEt7T0j6a0QsKr9aZs3hvpnMqqvTM6iIGEV6MexNpJfDPivpOmA93t0/lJmZWbfp8jmoiJgdEadFxHbAaGAesBR4IL/d3MzMrNut1JsgIuL2iJgAvBc4EfhwKbUyM7Neb5VeVRQRb0TEryPiwO6ukJmZGfhdemZmVlFOUGZmVklOUGZmVklOUGZmVklNSVCStpR0q6RHJD0s6Su5fKCkaZKeyD83yeXKvfjOlPRgflFt+7bG5eWfkDSuUP4RSQ/ldc6VpM5imJlZtTXrDGoJ8LWI2AHYAzhB0g7ARODm/IzVzXka4EBguzxMAM6DlGyAU0hvttgNOKWQcM4Dji+sNyaXdxTDzMwqrCkJKiLmRcSf8vjLwKPAEGAscEle7BJSFx7k8ksjmQ5sLGlz0gtrp0XEwvzKpWnAmDxvw4iYHhEBXFqzrXoxzMyswpp+D0rScGAX4C5gcETMy7OeBQbn8SHAnMJqbbmss/K2OuV0EqO2XhMkzZA04/nnn1/5HTMzs27V1AQlaX3gN8BXI+Kl4rx85hNlxu8sRkRcEBEjI2LkoEGDyqyGmZk1oGkJKvct9Rvgioj4bS6eny/PkX+29y81l9Rzb7uhuayz8qF1yjuLYWZmFdasVnwCLgIejYgfF2ZNAdpb4o0Dri2UH51b8+0BLM6X6aYC+0vaJDeO2B+Ymue9JGmPHOvomm3Vi2FmZhW2Ml2+r45PAEcBD0m6P5f9CzAJuFLSscBs4LA87wbg06QefF8DjgGIiIWSTgPuyct9NyIW5vEvAb8E1gFuzAOdxDAzswprSoKKiNsBdTB7dJ3lAzihg21dDFxcp3wG8KE65QvqxTAzs2rzmyTMzKySnKDMzKySnKDMzKySnKDMzKySnKDMzKySnKDMzKySnKDMzKySnKDMzKySnKDMzKySnKDMzKySnKDMzKySnKDMzKySnKDMzKySnKDMzKySnKDMzKySnKDMzKySnKDMzKySmtXlu5mZdaPhE69fpfVmTTqom2tSHp9BmZlZJTlBmZlZJTlBmZlZJTUlQUm6WNJzkv5cKBsoaZqkJ/LPTXK5JJ0raaakByXtWlhnXF7+CUnjCuUfkfRQXudcSeoshpmZVV+zzqB+CYypKZsI3BwR2wE352mAA4Ht8jABOA9SsgFOAXYHdgNOKSSc84DjC+uN6SKGmZlVXFMSVET8EVhYUzwWuCSPXwIcXCi/NJLpwMaSNgcOAKZFxMKIWARMA8bkeRtGxPSICODSmm3Vi2FmZhXXymbmgyNiXh5/Fhicx4cAcwrLteWyzsrb6pR3FuNdJE0gnbExbNiwld0X6war2mwWelbTWTNrTCUaSeQzn2hljIi4ICJGRsTIQYMGlVkVMzNrQCsT1Px8eY7887lcPhfYsrDc0FzWWfnQOuWdxTAzs4prZYKaArS3xBsHXFsoPzq35tsDWJwv000F9pe0SW4csT8wNc97SdIeufXe0TXbqhfDzMwqrin3oCT9GhgFbCapjdQabxJwpaRjgdnAYXnxG4BPAzOB14BjACJioaTTgHvyct+NiPaGF18itRRcB7gxD3QSw8zMKq4pCSoijuhg1ug6ywZwQgfbuRi4uE75DOBDdcoX1IthZmbVV4lGEmZmZrWcoMzMrJKcoMzMrJKcoMzMrJKcoMzMrJKcoMzMrJKcoMzMrJKcoMzMrJKcoMzMrJKcoMzMrJKcoMzMrJJa2WGhVZg7DzSzVvMZlJmZVZITlJmZVZITlJmZVZITlJmZVZITlJmZVZITlJmZVZITlJmZVZITlJmZVVKveFBX0hjgHKAvcGFETGpxlVaKH5o1sypY1e+iVf0eWuPPoCT1BX4GHAjsABwhaYfW1srMzLrSG86gdgNmRsRTAJImA2OBR1ZlYz6bMTNrjjX+DAoYAswpTLflMjMzqzBFRKvrUCpJhwBjIuK4PH0UsHtEfLlmuQnAhDz5AeDxVQi3GfDCalR3VfSWmK2K65iO6Zjlx9wqIgbVFvaGS3xzgS0L00Nz2Qoi4gLggtUJJGlGRIxcnW04ZrXiOqZjOmbrYvaGS3z3ANtJ2lrSWsDngCktrpOZmXVhjT+Dioglkr4MTCU1M784Ih5ucbXMzKwLa3yCAoiIG4AbmhBqtS4ROmYl4zqmYzpmi2Ku8Y0kzMysZ+oN96DMzKwHcoIyM7NKcoIyM7NK6hWNJKznkjQQICIWtrouZWrFfkoazPK3qsyNiPlrYswctxXHt1f87pbJjSRWkaR+wLHA3wJb5OK5wLXARRHx9poQsyZ+U75cJA0DfgCMBl4EBGwI3AJMjIhZJcTcCPgWcDDwHiCA50jHdlJEvFhCzKbvZ447Ajgf2IjlD60PzXX4UkT8aQ2J2Yrfo1Z9phsBYyj8fQJTy/i9bWrMiPCwCgPwa+A8YA/SH9rQPH4e8J9rSswcdwQwHXgU+H0eHstlu5YQ707gcKBvoawv6SHr6SXt41Tgm8B7C2XvzWU3lRSz6fuZY9xPet1XbfkewANrUMxW/B61IubRwJP5e+DkPJyfy47uyTG7veK9ZQD+sirzelrMvO2mfrkAT6zKvNWM+fiqzOtp+9lA3Jm9JGZZv0ct+d0FNq5TvkmJ30VNiel7UKtuoaRDgd9ExFIASX2AQ4FFa1BMgPUi4q7awoiYLmm9EuLdK+nnwCUsfxP9lsA44L4S4gHMlvQN4JLIly7zJc3xrPg2/O7Uiv0EuFHS9cClNXGPBn63BsVsxfFtRUyRLknXWprn9diYvge1iiQNB84E9mF5ctgYuJV0rfmva0LMHPdcYBvqf7n8NWreDN8N8dYi3Wsby4rXt6eQ7rW92Z3xcsxNgIk55nty8fwc88wo4UZ3K/azEPvAenEjvXVljYjZot+jVsQcB/wrcBPL/z6HAfsBp0XEL3tqTCeobiBpU4CIWLCmxmzFF5qZNSb/g3UA726wUNqVlWbEdIJaDZLWBbaLiAcKZcOAdyLiXV16lFyX/SJiWjNjliUf1y+TLiH8hHTT+e9JDTO+GxGvNKket0TEPiVuf7OIeKEw/QVSD9B/Bv4jSvrjzC9PnhwRL0jaBvgF8GHgL8BxEfFQCTF/C/wGuLaJn9/7SDfv55KuPJwFfIzU2Oefo7wWdXuTfl+3BN4hHdcLI2JmGfEKcde4xwb8oO7qeRv4bc19mAuBzVtQl4vK2rCkvpL+QdJpkj5eM+/kEkL+EhgMbA1cD3wU+CHp2vZ5JcRD0oM1w0PAJ9qny4hJujzSHv9k4CjgXtJlkh+XFBPgnwqJ8VzgrIjYhNRi8fySYu5OejziaUlXSvrbfDmsTL8kdbfzKqnF6ePAgaR7XheXEVDS90mXvqeTvh+ezMNV+f5xGTFHSJoO3EZKxD8A/iBpuqRde3TMMlp49KYB+DfgmDw+DLivxFhTOhiuA14tMe6FwK+Ar5K+QH9cmPenEuLdn38KeJblZ/oCHizx2F4OfBDYChhOura+Fam3zzJi3lcY/xOpMQpAf+ChEj/Pxwvj99TMK+v43pd/bkhKxDcAz5PO3vZvwvF9uqN53RzzoZyoY90AAAzySURBVMJ4P+COPL4J8OeSYq6xjw24Fd/qu5D0mvlfkP5z+kWJsT4JfAGovUQi0qWhsuwWETsBSPop8PN8yeYIymslRESEpBsi/+bn6VIue0XEZyX9Lemz/LeImCLp7YiYXUa8bB1Ju5CuZPSNiFdzXd6W9E6Jca+W9Evgu8A1kr4KXENqfPN0STHbP8OXgMuAy/J91ENJjVNu6mTdVbVU0vtJDwevK2lkRMyQtC3p2aQyLJU0MFKjmi3a40TEIkll/a00u5Vt02I6Qa2miHhMyftJD+N9ssRw04HXIuIPtTMkPV5i3GWXYiJiCTBB0r+Sno5fv4R4MyStHxGvRMQX2wvz/ZKXS4gHQERcI+km4DRJx1LY75LMY/mlvIWSNo+IefmLe0lZQSPi25LGkx783gZYG5gA/BdwZElh33XfKVIDn/Mp77LiN0hXF5aS3hDyLUk7k87iji8p5veA+yT9BfgA8E8AkgYBD3S24mpYYx8bcCOJbpD/2L9Iukl4RIur0+0kXQ5cHhG/qyk/DjgvIvo3sS6KJvzS5i+yj0VEWV+encXuC6wdEa81O/aaTtJmwKKIKO0MVekdfO8jPYBc2quGamKukY8NOEF1g9zqbB7w9xHx+ybH/kxE/HczY7aCpAsiYkKTY54aEaeu6TFz3FYc394SsyWf6ZrArfi6QUS8FhEbNTs5Zd9tQUwkNbtL6ZFNjgfw2V4SE1pzfHtLzFZ9pq34O+3WmL4H1fOV1kihC83+Q3+uyfGgNce2VZ9nK45vb4lZ6meaLyl2FPfTPTmmL/H1MJI+yIrXfZ8Hro6IR5tcj99FxJhmxmw2SX0iv/NwTY5p5Sr7M80tPmezYiKMPD0kIrq9sU+zYjpB9SCSvklq2j0ZaMvFQ0mtBydHxKRW1a07qTV9M/Wa/r1adHx7S8xW/B49AYyOiHc9IiBpTkRs2VNjOkH1ILnp6o61v+T5ifyHI2K7kuI29Q9d0lRSE/ZLIuLZXPZe0huhR0fE/t0ZL2//16QO5i5hxeQ/DhgYEYevCTFz3FYc394SsxW/RycAt0fhlWuFeSdGxE96akwnqB5E0mPAAbUPj0raitSp3gdKitvUP3RJj3e0L53NW82Yf4mI96/svJ4WM2+7Fce3t8RsyWe6pnIrvp7lq8DNkm6UdEEefgfcDHylxLjDI+LM9uQEEBHPRsSZpFcBdbfZkr6h9CJKIL2UMl/iLKtvpoWSDlXqX6s9Zh9Jh1Ny/15NjgmtOb69JWarPtMVNLv1nqSRKuHdik5QPUh+UPb9wHdIXZRPBU4FPlD7EG03a/Yf+uHApqSXTy6StJD0UsqBwGElxIN0H+8Q4FlJf8mXU58F/i7PKzPm/BzziSbEhNYc394SsxW/R/U0rZWtpM2B/yW9tqp7t+1LfNYVdd6Z36Qooc+Z3FpxKDA9Ct0zSBpTVjKWtDvp/tqTpJfGfgx4pMyn8QuxN82j50TEF8qOVxP7k6R3OT4UEWW8E6/92D4WEYuVHmyfCOwKPAx8LyIWlxDzJOCaiCjrbKlezLVIDZmeIb0AeAzwCdJ+XlBWw5c69WhaK1tJE0mvzNo2Ivbu1m07QdnqkHRMRHTrC3LzF8sJpH57RgBfiYhr87w/RUS3dyEg6RRSVwz9gGmkL+zbSF1fTI2IM0qIOaVO8T6k+31ERCkPeEq6OyJ2y+PHkY71fwH7A9eV0RpU0sPAzhGxJF9+epXUP9ToXP53JcRcnOM8SXob/1VR6H+rDJKuIP0OrQMsBtYjvYh3NOn7dlyZ8Vshf7Z7kf5hPSoinuy2jUcJr2L30HsGarox6KZtPgSsn8eHAzNISQpK7CaB9ObpdYGXgA1z+TqU1wXFn0hdfIzKf+CjSK/M2gvYq8TPrNgNxT3AoDy+HiV18wE8Wtzvmnn3l7WfpNsY+5P6S3ue9CLTccAGJcV8MP/sR7rK0DdPl9lVzEbAJFKHnguBBaR/7iYBG5f1e5Rj7w38Jo9PIJ0Nd9v2fQ/KuqR3d+ZX7NRvcJcbWHl9Il/Wi9Tr6SjgQEk/pryn8pdExDuRXtD6ZKRuIYiI10lvwy7DSFL/Wt8GFkfEbcDrEfGHqPPG+m7UR9Im+bKiIuJ5gEjdfZT1FvU/Szomjz8gaSSAUi8AZV32iohYGhE3RcSxpOeSfk667PZUSTH75Mt8G5D+2dkol69N6uerDFeSGmCMioiBEbEpKXEsyvPK9EWWd5Y6GVihgcjq8quOrBGDgQN4dyskkW6Odrf5kkZExP0AEfGKpM+QekH9cAnxAN6StG5OUB9pL1R6BqyUBBXp7QJnSboq/5xPc/4mNyIlRgGh5d18rE95/wAcB5yj1HPwC8CdkuaQGtkcV1LMFfYl0v2fKcCUfB+sDBeRzmT6kv7xuErSU6SO/CaXFHN4pBa1y0RqcXumpC92sM5qk7Qx6T7t0TnmS0q97H4a6JYXWPselHVJ0kXALyLi9jrzfhURn+/meENJZzTP1pn3iYi4ozvj5e2uHRFv1infDNg8Ih7q7ph1Yh0EfCIi/qXsWB3EXxcYHBF/LTHGhsDWpETcFhHzS4z1/oj4S1nb7yTuFgAR8Uz+Et+XdCn87pLi3QT8nvSc4vxcNhgYD+wXEfuWEbcZnKDMzHqwQivbz7L8knt7K9szI/Xu2yM5QZmZ9XBKvU3/HalX23eAx4Fftd9L7ancSMLMrAfLj2WcR2qIMRJYi5Sopksa1cKqrTafQZmZ9WC5Ne2IiHgn30e8ISJGSRoGXBsRu7S4iqvMZ1BmZj1fe+vPtYH1ASJ1hVFW0/amcDNzM7Oe7ULgHkl3AZ8EzgSQNIj04G6P5Ut8ZmY9nKQdge2BP0fEY62uT3dxgjIzs0ryPSgzM6skJygzM6skJygzM6skJyjrFSS9UhiWSnq9MH1kq+u3KiTNklTae9YkjZf0Tj5GL0l6IL+016wpnKCsV4iI9dsH4GngbwplV7S6frUklf4ISIMx7szHbGNSVxWT8wtQzUrnBGW9mqQ+kiZKelLSAklXShqY5w2XFJKOkTRH0iJJ/yjpo7k/rBcl/bSwrfGS7pD0U0mLJT0maXRh/kaSLpI0T9JcSadL6luz7lmSFgCnStpG0i25Xi9IuqI9OUi6DBgGXJfPcL4haZSktpr9W3aWJelUSVdLulzSS8D4zupUlLsGuYzUqeF2eXsd1q8Q++v5WC2W9J+SBhTmfyPHfUbScflYb5vnrS3p3yQ9LWm+pPMlrbOaH7f1ME5Q1tudCBxM6sV2C1KfVz+rWWZ30pfy4cDZpH5+9gV2BA6TtFfNsk8CmwGnAL9tT3jAL0kdAm4L7ELq6fW4mnWfIr2R+gxSf0bfz/XanvR+tVMBIuIoVjwT/EGD+zsWuJp0RnRFA3UCICetY0idC85uL+6ofgWHkToI3BrYidQFBJLGAP+XdBy3JXVKWTQJeD8wIs8fAvxrg/toa4oyuwP24KGKAzAL2DePPwqMLszbnPQl3I/U3XwAQwrzFwCHF6Z/A3w1j48HniE/X5jL7gaOIiWdN4F1CvOOAG4trPt0F/U+mBW7a1+2H3l6FKmPpY729VTgj4V5jdRpCfBiPiavA4etZP2+UJj+AXB+Hr8Y+H5h3rb5WG9LSnyvAtsU5n8M+Gurf3c8NHfwq46st9sKuEZSsdfcd1ixK/tip3qv15levzA9N/I3ajabdIaxFem9aPOkZR299iH1KNuuON7e6dw5pNfXbJCXr+3VeGUVYzRSp+kRsadSb7sX5bpcuRL1K3Y6+RrpWJB/zuigXoNI3aXfW6iXSL3UWi/iS3zW280BDoyIjQvDgIiYu4rbG6LCtyrpPtEzOc6bwGaFOBtGxI6FZWtf6/K9XPbhiNgQ+AIrdmNeu/yrpC92YNlluUE1yxTXaaROaaWIV4B/Ao6S1P527K7q15l5wNDC9JaF8RdIiX/HQr02itRYw3oRJyjr7c4HzpC0FaQXbEoauxrbew9wkqT+kg4l3Zu5ISLmATcBP5K0YW6csU3N/ataGwCvAIslDQH+uWb+fOB9hem/AAMkHSSpP3Ay6e3Wda1snSL1zHohy+8FdVW/zlwJHCNpe6UuIv5fIc5S4D+AsyS9B0DSEEkHrMT2bQ3gBGW93TmkrrFvkvQyMJ3UWGFV3UVqUPECqaHDIRGxIM87mtSZ3COkS2FXk+55deQ7wK7AYuB64Lc1878PnJxbE349IhYDXyIlkbmkM6o2OreydTob+LSknRqoX4ci4kbgXOBWYCbpuEM6owP4Znt5bnH4e+ADjW7f1gx+WaxZN5E0HjguIvZsdV16GknbA38G1o6IJa2uj1WDz6DMrCUk/W1+3mkTUh9G1zk5WZETlJm1yj8Az5GeG3uH1AjDbBlf4jMzs0ryGZSZmVWSE5SZmVWSE5SZmVWSE5SZmVWSE5SZmVXS/wdOmwflqSSj6gAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "#ax = df_temp_range_count['count'].plot(kind='bar', title =\"Accident based on Temperature\", figsize=(15, 10), legend=True, fontsize=12)\n",
    "ax = final_temp_acc.plot(x=\"TemperatureRange\", y=\"count\", kind=\"bar\")\n",
    "ax.set_ylabel(\"Accident\", fontsize=12)\n",
    "ax.set_xlabel(\"TemperatureRange\", fontsize=12)\n",
    "ax.set_title(\"Temperature vs Accidents\")\n",
    "rcParams.update({'figure.autolayout': True})\n",
    "plt.tight_layout()\n",
    "plt.rc('xtick', labelsize=12) \n",
    "plt.rc('ytick', labelsize=12) \n",
    "plt.rc('font', family='serif')\n",
    "plt.savefig('/Users/pprusty05/google_drive/Data_Mining/Project/plots/Temp_vs_Accident.pdf')\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## States Vs Accident"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_state=df.groupby('State').count().toPandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "top20_state=df_state.sort_values('count').tail(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3debxVZb3H8c9XQMmhMECyHFBM1ApJUXO4Sqk5YIk2WJZ1MofmW9cc06K0MrXsXm+KRoaZaJmml8wsFTBNE0hFUyw1UnICU9QMRfjdP55n42Kzz977HPYk5/t+vfbr7LWeYT17OOu3n2c9ay1FBGZmZp1mjXY3wMzMrBIHKDMz60gOUGZm1pEcoMzMrCM5QJmZWUdygDIzs47kAGV9iqRNJV0t6SZJN0iaLumz7W5XLZImSvpYk+r+iKRnJK3ZoPq6baukyyUtljS2Eduy1ZsDlPU1k4HrImL3iNgTmAB8qpQoabKkCfVWlgNcV4PbWMmXgSlNqvtAYACwT4Pq67atEfEB4PFGbETSWEnzGlGXdSYHKOtrdgSmlxYiYjpwSbsaU6+IeD4iXm50vZJeBywFpgIfbESdzWqr9T0OUNbX/B04TtI6pRURcTqApP8E9gW6cs/ok3n9uYXhwEslvTav/zYwGjghp43L6/eRdKukGZKmSnpjpYbkMi9Jujwv7y3pj/n59pLulzRb0mGS5kmanNPWlvTzXP/Nks4u1DlC0m9z2u8l7VLj/TgQuAK4FDhQ0lplbfyIpNsk3Zgfe+b1/SWdLukPebj055I2L29rzrufpDn59Z5U4X34WN7GDElTCu/vuXno8dQ8NPgXSd/KaSOB7wNvyPVeXuN12qtRRPjhR595AHsCTwHPAD8G9ihLnwxMKFv3hcLzCcCpheXpQFdheTPgOWBkXv4scH03bVkH+Dfwxrx8NrAEGJqXzyk8nwBMzs8/A5yXn/cDZubn/YH7gMPz8ihgIbBelffjEmAgsCbwNDC+kLYLaTiu1Ib3F9pwEvA7oF9e/t/S+1DW1iHA88DOefm9+TWOzcu75jaWtnEmMKns/b0GELBhLlt6v8YC89r9nfKjeQ/3oKxPiYgbgE1Ix0mGA9MknV+j2OLcG5kBfAjYvkreQ4FZEXF/Xp4C7Clpwwpt+RcwAxiXV20IzCL14iDttBdU2MY/gf+QtFNELAX2yOt3AkYAF+f65wD/AA6o1FBJg4DnI2JxRLxE6kkdUsjyCeDXhTZcBZxXSLs4bx/gW/m1lBsHPBERt+Y2/R8pKJd0AVML25gCfESSCnmui+Qx0o+L4ZVej61++re7AWatlgPDJGCSpD2AGyV9JyIeKs+bZ5t9F3hbRMzLEyK6qlS/EbCNpOmFdX8Hhkn6OK8En9Mj4jek3sEBOf9fgHuAcZLuys8rtf8ySf2B/5Y0GPgeKXBsBATwu8L+fS3gdd20dTywc6Gt6wObS3pNRPw71zensN2XgT8WXueCQtqj3WxjQ1IPqeifhefl71d/4AlgcKHcs4X8i0m9PesDHKCsT5F0XkR8urQcETMkPUX3O/EdgfsjYl5eHlBjE4+QelClXhGS1geejYg7gdPL8v8K+DZwUH6+BPgv4M+kiQuVXsMQ4GcR8VNJbweulzQ3b3tJRIwt5F0HWNZNW/cBto+IJTnvmqSgMw74Ra5vaKGu/sBbIuKuCmmDSUOJ88q28VgxX/b6wvNHgIciYvlUf0lDIqI8qFkf5CE+62v2krRjaSH3oJYBc/Oq54C1Ja0j6RLgAWCLvAOGladil/K/WdKZpMkGO0naNNe/Aek4SsX/tYj4G/AwqVc2MyLuAF4APpADWiWf45VhwbtJPZJ+pN7Nw5IOztvuTxqW27K8ghw0Xy4Fp9yWl4Bf88psvsnA/jkgQhr+6yqkHSapX14+Hdi2QluvATaQtGve7oHAuoX0yaQe4/o5fSTdBOYKngPWzuXOkbRxneXs1aLdB8H88KOVD+BI0rGSacBNpODxjkL6zqRgdTvpeNIapOHAB4ArgZ+QJlickfMfBNyf878zr3s3cEuu+8Zi/d206QzgwsLyBcD5heXDgHmkCQtfAd4B3JDrngV8u5B3BPCb/Bp/T54wUba91wF3Ag8B4wrrxwEPkobRzs3rPgrcml/LL4DX5vUDSEHpVuBm4JuV2prX7U8KpDeRjlXNy9vfsbCN2/Lr+Q2wZeF9eSZ/HjsD5+a23QlsQxoBujG34XJA7f5++dHYh/IXwczMrKN4iM/MzDqSA5SZmXUkBygzM+tIDlBmZtaRfB5UDw0ZMiSGDx/e7maYma02Zs+evTAiys+Xc4DqqeHDhzNr1qx2N8PMbLUh6e+V1nuIz8zMOpIDlJmZdSQHKDMz60g+BtUAS5YsYf78+SxevLjdTWmLgQMHstFGGzFgQK3rqJqZ1c8BqgHmz5/Peuutx/Dhw1nxNjarv4jgqaeeYv78+Wy22Wbtbo6ZrUY8xNcAixcvZvDgwX0uOAFIYvDgwX2292hmzeMA1SB9MTiV9OXXbmbN4yG+Jhh+wjUNrW/e6eNqZzIzW804QFld5s2bx5133sn48ePb3RQz62C1fqD35Ae3h/isLvPmzeOqq65qdzPMrA9xgFqNfOMb3+Dkk0/m1FNP5f3vfz+PPvooRxxxBGeccQZHHnkkt9xyC4sXL6arq4uuri4ALrjgAkrXFpw6dSpbbLEFX/rSlzjiiCPYZZddmDdvHi+99BKTJ0/mzjvvZMKECcydO7f7RpiZNYgD1Griuuuu47bbbuO0007jlFNOYb/99uOYY45h33335bjjjuO0007jkEMOYa211loenACOOuqo5c/f8573sNtuu7HVVlsxadIkDjroIK644grWXHNNurq6GD16NBMmTGCrrbZqwys0s77GAWo1MWfOHLbYYovly5/85CeZM2cOm2++OQDDhg1j0aJFLFy4sGZdW265JQBDhw7lueeea06DzcxqcIBaTWy77bY8+OCDy5cvvPDCFdY9/vjjDBo0iCFDhrDeeuvx7LPPAvDwww+vVFelaeP9+vUjIli8eDH3339/k16FmdkrPIuvCdoxLfzd7343t912GyeeeCIDBw5k8ODBnHXWWXzlK1/hr3/9Kw888ACXXXYZkhg9ejTLli3j1FNPZfjw4SxatIhLL72UESNGMGfOHC6++GJGjhzJ1KlTefrpp3nggQd4y1vewvz58znmmGM44IADGDlyZMtfo5n1LYqIdrfhVWXMmDFRfj+o++67j6233rpNLeoMfg/MDHo3zVzS7IgYU77eQ3xmZtaRHKDMzKwjOUA1SF8eKu3Lr93MmscBqgEGDhzIU0891Sd31KXbbQwcOLDdTTGz1Yxn8TXARhttxPz581mwYEG7m9IWpRsWmpk1kgNUAwwYMMA36zMza7CWDvFJOkLSLZJuljRH0h55/WhJt+a0qZIGF8pI0pmSZkqaLemwsjqbUtbMzNqrZQFK0geAPYHdI2I34GzgDZLWBK4GToqIXYE/ARMLRY8Gtgd2AvYBzpI0KtfZzLJmZtZGrexBfRX4RkQsBYiIH0fEz4D9gKURMS3nmwQcLGloXj4amBwRyyJiIfAr4Mic1syyZmbWRi0JUJI2ALYGRkuaJun3ko7OyTsAy+/fEBGPAC8A20laCxhVTAfuBcY0s+yqvVozM2uEVk2SGA4IOAjYC9gAuF3SImAYsKgs/zM5zxBSEF1UIY0mljUzszZr1RDfWnlb50TE0oh4DLgYODynVzqBqHhJ7fL0ammNLJtWSEdJmiVpVl+dSm5m1mqtClBP579PFNbNBzYCngQGleUflNcvBJaVpZfSaGLZFUTEBRExJiLGDB3qQ1RmZq3QqgD1V9LxneLw2VDgUWAmsPzeDZI2BtYGZkfEi8DdxXRgm1yGZpXt7Ys0M7PGaUmAysHiJ8AnASStAxwCXARcC/QvnRNFGva7MiJKY2kTga58TtNgYBxpxh1NLmtmZm3UyitJfBmYKGk28DIwGfhpRISk8cC5kpaShgO7CuXOB0YAt5MC6rERcRekwNfEsmZm1kYtC1AR8S/gsG7S7gB27iYtgGOr1NuUsmZm1l6+mrmZmXUkBygzM+tIDlBmZtaRHKDMzKwjOUCZmVlHcoAyM7OO5ABlZmYdyQHKzMw6kgOUmZl1JAcoMzPrSA5QZmbWkRygzMysIzlAmZlZR3KAMjOzjuQAZWZmHckByszMOpIDlJmZdSQHKDMz60gOUGZm1pFaEqAkTZY0veyxbiF9tKRbJd0iaaqkwYU0STpT0kxJsyUdVlZ3U8qamVl7tawHFRFjyx7PA0haE7gaOCkidgX+BEwsFD0a2B7YCdgHOEvSqBaUNTOzNuqEIb79gKURMS0vTwIOljQ0Lx8NTI6IZRGxEPgVcGQLypqZWRu1LEBJmijp95KulfTOQtIOwNzSQkQ8ArwAbCdpLWBUMR24FxjTzLKr8jrNzKwx+rdoO/cBN0bETEk7ANMk7RYRdwLDgEVl+Z8BNgCGkILoogppNLHsCiQdBRwFsMkmm1R9oWZm1hgt6UFFxHciYmZ+PhO4hjT8tjxLhWKqkl4trZFlU6aICyJiTESMGTrUI4BmZq3QrmNQDwOb5udPAoPK0gfl9QuBZWXppbRmljUzszZr1TTz48pWDQMezc9nAiMLeTcG1gZmR8SLwN3FdGCbXKZpZXv+Cs3MrNFa1YP6L0kbAEjaDDgQuDinXQv0l7RHXj4cuDIiFuTliUBXPqdpMDCONOOu2WXNzKyNWjVJ4izgl5JeBtYBPhcRMwAi4kVJ44FzJS0Fnga6CmXPB0YAt5MC6rERcVcLypqZWRu1JEBFxFmkINVd+h3Azt2kBXBsq8uamVl7dcKJumZmZitxgDIzs47kAGVmZh3JAcrMzDqSA5SZmXUkBygzM+tIDlBmZtaRHKDMzKwjOUCZmVlHcoAyM7OO5ABlZmYdyQHKzMw6Uq8DlKQ3NbIhZmZmRXUFKEk/q7D6+G7Wm5mZrbJ6e1BDy1dExBeAjRrbHDMzs6Tq/aAk/Q0I4A2SHipLXhu4p1kNMzOzvq3WDQu7AAFnA18sS3sOuKsJbTIzM6seoEq3ZZd0SET8pTxd0lbA3Ca1zczM+rC6jkFFxF8kvUHSOyTtXnoAU3q6QUlvlrRE0tjCur0kzZR0m6SLJA0spA2UNDmnzZL07rL6mlLWzMzaq95ZfN8EHgZ+CVxUeGzVi21+A3ipUPdQ4DLgIxHxjrz61EL+CYBy2qHAZZKGtaCsmZm1Ub2z+N4PbBIRG0bEZqUH8OOebEzSDsDzwILC6kOBOYUhxEnAEZL6SVoDOAL4EaSeHHAH8NFmlu3JazIzs+aoN0DdFRGPV1j/kx5u7+v5UbQDKx7HuhcYBGwBbA4MrpA+psllzcyszWrN4iuZK2kqcB3wbGH9CcA29VQgaT/gzxExX1IxaRjwSGH5mfx3A2BZfr6oLL20zWaVvb+s7UcBRwFssskmK784MzNruHoD1OeBO4H3la1/Qz2F83Db8cDB3WSJSsWqpFdLa2TZlCniAuACgDFjxlQqY2ZmDVZvgJoSEZ8tXynpO3WWPxS4LiL+WSHtSdLQWsmgwvqlhXVPVnjerLJmZtZm9U4zXx6cJL2+sP74OrfzH8ABkqZLmk7qeX1f0tXATGBkIe82pOG2B4AHgX9WSJ+ZnzerrJmZtVm908zXlfRDSS8AMyUNlvQHSSPqKR8RR0fErhExNiLGAo8DX4yIA0nnUm0r6c05++HApIh4OSKWAT/M68h5RgOX5LxNKVvPazIzs+aqdxbfRFJvZFfg0Yh4CvgEcE5PNiZpx7Ie1Fcj4kngQ8AUSbcB/YBTCsUmpKK6DbgU+HBpRmGTy5qZWRvVewzqTRHxUQBJSwAi4n5Ja/ZkYxFxOzC2wvrrgeu7KbOYdE3A7upsSlkzM2uventQA4vHngAkDQLWaXyTzMzM6g9Q55HOhfoRsJmkc4A/0cMhPjMzs3rVO4vvJ8BBpKnb9wFrkq5h1+OLxZqZmdWj3mNQRMQtwC3FdZLGRMSshrfKzMz6vG4DVL6dRi3fB7ZrXHPMzMySaj2o60jnK0G6JNBGwL9J081fD6wL/L2prTMzsz6rWoCaGhEfBJB0InBPREwtJUo6kHRFcDMzs4brdpJEKThlexeDU06/GtitWQ0zM7O+rd5p5oMl7VhcIWln0lCfmZlZw9U7i+8E4AZJjwMLgaGk+yZ9sGopMzOzXqorQEXEtZI2A8aRrqP3GHBNviafmZlZw/XkPKiFwEXFdZJOjIhvN7xVZmbW51U7D+qjpBsVLpN0YTfZ9gUcoMzMrOGqTZLYFRiYn7+TdM5T+ePFprbOzMz6rG57UBHx6cLiKRHx0/I8kh5sSqvMzKzPq3ea+S2SPiZpUwBJm0p6T6WgZWZm1gj1BqjTgTG8MqT3HPAeSac2pVVmZtbn1TuLb1hEHFJaiIh/AkdJuqk5zTIzs76u3h7UgPIVktagB9PUJX1e0g2SbpR0t6TPF9JGS7pV0i2SpkoaXEiTpDMlzZQ0W9JhZfU2payZmbVXvQHqJknXSfqopH3yFPRrgek92NYRwIcj4l3AwcDZknaUtCZwNXBSROxKulPvxEK5o4HtgZ2AfYCzJI0CaHJZMzNro3oD1MnADOCrwC+BU0jB6as92NZhEfEkQET8FXgaGA7sByyNiGk53yTgYElD8/LRwOSIWJZPFv4VcGROa2ZZMzNro3pv+b40Ir4VEVtGxNoRMTIivh0RL9e7oYiYU3ou6X3A88BvSbfsmFvI9wjwArCdpLWAUcV04F7ShA2aVbbe12RmZs1TV4CS9C5JF0raIS+PkvRdSev2ZGOS3ibpXuB7wIci4hlgGLCoLOszpIvRDsltXFQhjSaWLW/3UZJmSZq1YMGCWi/TzMwaoN5JDicB55KO0wD8GZgNXEgPrmgeEXcD20h6O3CtpPeWkipkV7FoD9IaWTZlirgAuABgzJgxlcqYmb3qDT/hmqrp804f16KWJPUegyIiroyIpfn50oiYQuql9FhE3AFcA3wBeBIYVJZlUF6/EFhWll5Ko4llzcyszeoNUOtIWuHmhHl5YDf5VyBpcD7uVPQvYB1gJjCykHdjYG1gdkS8CNxdTAe2yWVoVtl6XpOZmTVXvQHqf4E/S5ok6duSJgH35PX1WA84RdLaAHmm3HjgBtJ09f6S9sh5DweujIjSwZ6JQFc+p2kw6Z5Uk3JaM8uamVkb1XvDwksk/Q3oArYFHgbeB3wEmFJHFY8DU0l35X2JNJR2IXBuvp3HeOBcSUtJ08+7CmXPB0YAt5MC6rERcVdu14tNLGtmZm3UkxsW/gH4g6SBwP7AF4H3AJ+ro+xi0rlTp3STfgewczdpARxbpe6mlDUzs/aqd5r5AEnvlXQJ8ATwI2Ap8GgzG2dmZn1XtTvq9iddHuiDpONFLwNXAg8BO0bEEkn7t6SVZmbW51Qb4luQ06cChwHX5qB0Y0QsAYiIX7egjWZm1gdVG+L7HFAKQP2pcAKrmZlZs1S75fslwCWSXgscCFwo6Xng9ZLWyLPv9ouIa1vVWDMz6ztqzuKLiGeBi4GLJb2OdDzqp5JeBnYBtmhuE83MrC+qe5o5QEQsAi4CLpK0PnBjU1plZmZ9Xt3X4isXEU8De9TMaGZm1gu9DlCwfPjPzMys4VYpQJmZmTWLA5SZmXUkBygzM+tIDlBmZtaRHKDMzKwjOUCZmVlHcoAyM7OO1KMrSZiZWWcafsI1VdPnnT6uRS1pHPegzMysI7UkQOU78n5R0nRJMyTdKmnPQvrovO4WSVMlDS6kSdKZkmZKmi3psLK6m1LWzMzaq1U9qDcB/wkcGBF7AF8Frpb0JklrAlcDJ0XErsCfgImFskcD2wM7ke7we5akUQBNLmtmZm3UqgD1HPDVfDV0IuJ3wGLS7Tr2A5ZGxLScdxJwsKShefloYHJELIuIhcCvgCNzWjPLmplZG7UkQEXEUxFxcWlZkoA1SbeV3wGYW8j7CPACsJ2ktYBRxXTgXmBMft6UsqvyWs3MrDHaNUliD+DvwE3AMGBRWfozwAbAEFIbF1VIo4llzcyszVoeoCQNBL4FdEXEsrw6KmUtPC9Pr5bWyLJphXSUpFmSZi1YsKBCETMza7SWBqg8tHcBcHZEzM6rnwQGlWUdlNcvBJaVpZfSmll2BRFxQUSMiYgxQ4f6EJWZWSu0ugf1XeD2iLhc0lqSNgFmAiNLGSRtDKwNzI6IF4G7i+nANrkMzSrbgNdpZmarqGUBStLxpCtXTJa0LjACOBy4FugvqXT7+MOBKyOiNJY2EejK5zQNBsaRZtzR5LJmZtZGLbnUkaQtgdPz4ucLSV+PiBcljQfOlbQUeBroKuQ5nxTMbicF1GMj4i6AJpc1M7M2akmAioi/UGHyQSH9DmDnbtICOLbVZc3MrL18LT4zM+tIDlBmZtaRHKDMzKwjOUCZmVlHcoAyM7OO5ABlZmYdyQHKzMw6kgOUmZl1JAcoMzPrSC25koSZmXVv+AnXVE2fd/q4FrWkszhAmZmtAgeX5vEQn5mZdST3oMysT6rV8wH3ftrNPSgzM+tIDlBmZtaRHKDMzKwjOUCZmVlHcoAyM7OO5ABlZmYdqaUBStIOkh6Q1FW2frSkWyXdImmqpMGFNEk6U9JMSbMlHdaKsmZm1l4tC1CSDgK+BCwqW78mcDVwUkTsCvwJmFjIcjSwPbATsA9wlqRRLShrZmZt1Moe1MyIOBR4rmz9fsDSiJiWlycBB0sampePBiZHxLKIWAj8CjiyBWXNzKyNWhagImJ+N0k7AHML+R4BXgC2k7QWMKqYDtwLjGlm2Z6+NjMza7xOmCQxjLJhP+AZYANgCKmNiyqkNbPsCiQdJWmWpFkLFiyo4yWZmdmq6oQABRAV1qlKerW0RpZNmSIuiIgxETFm6FCPAJqZtUInBKgngUFl6wbl9QuBZWXppbRmljUzszbrhKuZzwQ+VFqQtDGwNjA7Il6UdDcwErg9Z9kml2la2Ya+OjNbQSOuIu4rkfcNnRCgrgX+W9IeETEDOBy4MiJKB3smAl2Sfgq8HhhHmjLe7LJmVsaBwVqpZQFK0vbAd4HRwAmS3hsRB+eeznjgXElLgaeBrkLR84ERpF7QGsCxEXEXQJPLmq1WHFzs1aZlASoiZgNju0m7A9i5m7QAjq1Sb1PKmnUSBxfrizphkoSZmdlKOuEYlNlqzb0fs95xD8rMzDqSA5SZmXUkD/FZR6o1LFbPkJjPtzF7dXOAsoZrRHAxM3OAshU4uJhZp3CAaoBWDEe1qg4zs07hSRJmZtaRHKDMzKwjOUCZmVlHcoAyM7OO5ABlZmYdyQHKzMw6kgOUmZl1JAcoMzPrSA5QZmbWkRygzMysI/X5ACVpoKTJkm6TNEvSu9vdJjMz87X4ACYAioh3SNoSuE3S1hHxRJvbZWbWp/XpHpSkNYAjgB8BRMRfgDuAj7azXWZm1scDFLA5MBiYW1h3LzCmPc0xM7MSRUS729A2knYFbgYGRsSLed2pwC4RsWch31HAUXlxJHB/jaqHAAtXsXmuw3W8GuropLa4jldvHZtGxNDylT4GlZRHaa2QGHEBcEG9lUmaFRGr1AtzHa7j1VBHJ7XFdax+dfT1Ib4n899BhXWDCuvNzKxN+nqAehD4J2nYrmQbYGZ7mmNmZiV9OkBFxDLgh8DhAJLeDIwGLlnFquseDnQdruNVXkej6nEdrmMlfXqSBKQTdYGJwFakY3InRcRv29sqMzPr8wHKzMw6U58e4jMzs87lAGX2KibpiHa3waxZHKD6AEknt7sNjVJrhyzpbVXSNpT0NUlfbXzLek7S2qtQdqikTwHfaFBbDliFsm+V9DFJH2tEW1ZVre+7pC+2qi2vBpIG1ZHnI61oSzkHqF6SNKUBddT80CX93yrUv52k7wD/VSPfcXXUVTWPpF/2sHk90oMd8l2SfiTp9RXSngNmAB+ssa1Ne9nMnvpVTzJLer2kIyX9DvgHcAowYFUaIKmfpH2B/1mVekgnt3+2xrZq3ilgVe4mUO/3HfiKpA/3djt5W1tLulLSFZLWKUs7p56AL2kdSe/Nj/5laftJWncV2/hhSdMk3V0j61U16tkM+PqqtCXX84uelvGVJHpvf0k31cgTwNPAbOB/ImJRWfpJkn4WES9XKpy/5O/sSaMkjQIOAT5Autbg7cC/ahT7TB3/DIcBZ1RJHyvpRtKOqjTzpvgcIIqXkKolB5n3kQLKHsACau+Q5wE7A3MlHRcRkwsbfx6YLum5GnX8GHhXve3sjqTdSdd1nA/8Ip/WULrE1heB3eqoYxBwEOkzfSfwMOn/dreIuF3SSb1o1xrAnqT39SBgXWBJT+spiYh7gHvq2Cl/R9ISyq7UUp4HqHsWbS+/77OBgZKuAv47IqZVqjci5lSp4yhgIPDFiCjf3mXA9yX1j4hqO/+PA98CfgBcW5a2C/A9SftGxN9rvJ6KIuJS4FJJl9fI+jZJk0gzmJdfpCDPcD4ROBZYqzdtkLQR6Xt2CL24xqkDVO89DdxQR751gN2B7YHxZWmDSF+gQyNihR2EpCOBc4Eza21A0ltIX4IPAlsA04GlwJsi4glJh9eo4nXAfxSWtwXuqpCnmjsjYqWduqQRpKvFbw18qkYdjdghzwP2AY4DfiCpC/h0RNxXyFNr6urbqwTboaSTuZ+LiG6HRiSdQNr5PJXbv7ekC0k9le1I352qvQVJvyYFyoXAz4FTImKmpGkRcTtARHyrxmsp1SVgLOl9PQh4DWlH/hngN8Bp9dRTQ633dVtq/8/UnFa8qt/3iNg313Mp8KV8rc2zWfF6ceeTfuh0Zydg7wrBiYi4JfdKr6B67+QDwLsi4k8V6jhF0s2kH4WHVHs9daj1nl4GfJcUVG8h7XcOzuvWAf4T+FC9G5O0Iem1HUJ6n+4Hrs519bDlEX704gFc1MP8N1dYN560A/o5MKCw/pvAS8BRddT7Z9I/5x+ALwDD8vppvX0tlcrWer3AxhXWfQl4HvgZMKSOdvwaWEzqcXwP2KEXr+XGwvPNc50vkoLFWnn9rTXquKKb9YeSenHXA5vUqOMBYOfC8n7A44dSpFwAAA8jSURBVKQhxjF1vpYfAn8h7SCGVnqNddbzA+Ax4AXgStLO4zU9fF8n15Hn5zXSa26vVp5GfN/L6htB6rEty/UuLT2v93vW2zzAjAbUMXZVP5eyvCcDTwAvk35Y1vy/zeWGkYZ4b8rv4T3AV4DbCnm26+nn4x5U7722nkx5OOVgYKUD4pG7//nX7SWSPgGcRwpcB0ZEebe/kinA3qRhi5vjlRst1n2CW0R8vHxVHXnK0x8pPZe0FWmYbARweET8vM6m/IPUY/oBMCUiFnTXnnpExEOkodj3kX4hf0jS52vVFxHvKy5LegPpZO53AcdHxHl1bP6JiLi1UOe1kp4G9oqy3nKVdhwpqR+wF3CapPWA6ygMc0raMCIeq1HVlkA/0rDUZZGHlCX15H0dX8eQ9lY10u+pYzvX10hf5e+7pGOAC4GvkXr1s0if7d9KWYBLa1RTz5BXrUkwy+qoo1+N9LMl/RfVh003qFZB/p/4Cen47meAW0g/EhdSe7i05B/AM6SrRnwu8vCopOVD+lGhp1iLA1TvdXfMhcJykH45v4k0jrsCSVMi4tCIuCHFKObmOnaPiDuLebprRER8E/impJHAByWdCNxN4Z9D0tsj4o4evLZqX/buC6VgfALp4P21pCBb94V3G7RDfo2kjUkB4qVc7xWSfgOcShpyqWfHUNreJ0i9ubuAbSPibzWKlLxYYd1jxeAk6TeRh5u62fbkiOgivQfX5QPpe5OOr/2UdKuYI6gxth8Re0saQjqed6mkJ0i99uWfsyRF/pnbjXqGtAfXSD8gB+lqDiONIFTUoO/7ccBJpB3woRFxZXmGPCGnmpslfQs4OfKxxUJZ5ddQK6A/K2n7iJhdKVHSGNLEnmoaMWx6Iqnn9CLw0Yj4Wd7+O4ErJP0v8IVq31XSMeIPAG8DFkpaGBGP1thuTb6SRC/l4wArTWAoP+YSEd3ObpP0DFA8ELsl8CxpKKjkbRGxfg/btjXpy/JW0gVx3xMRb62S/+KIOKywfGOUHU8qz1OhjlGkX6XDgc9HOkBbnufkiOj2WIeknSLij4Xl0g75YNKQ1M3AEVHl0v2Sfkz6hzwjIuZWSB8FfC8i9uqujpxvY2ASsCvp4HGPZrlV+GwBRpWtq/rZSvonaSf3a9Kw41OFtAGkY23nRMRmPWzbBqRg9V7gr7n+kyOi20kbtT67nOf4iPhOlfRFpB5BtYsxbxsRlWZgFuvZE9iXNLHjl6Rh5NL3/SHggBrf96dJw0/nR8TSatuqUsdrSO/bZsCNpB4EpB+j7yLd+HR86UdSN3WMIv2QuxAozcws1fFu4GPAvhFxb5U6Ku6HyvJcHhEfqJL+b9Kx7m9HxL/L0voDx5BGDqp+Ljm/SJN/3kfquW0H/EdELJD0iYj4ca06VqjPAap3JG1cHNbK675E+pV+DfDZiKh6ky5JfwMmV8sCfCwiNu9lG/cHPgGMi4huhxsq7EzLd6RQe2f6Um7vj4BKv5wEfLzaa5E0lzQMdVv5P/aq7JBz+bWAcaQD6++t8X58Gjid9B50RcSDFfL8IiLeX6WOVf5slaZMfw3Yn/QP/xrS1PQrI+KZnGf3iKj1S737RkjDgPeTAtSGVfJNI+14q/Wub6i2s1SaKdpF6iH/lnRc64WyPBdVG07Ow1FnkXpNA0g/BN8fEf+X098KXB4RW1ep48WI6NWstLJ61iAFkfeRhrMhHXv8GWmIup4JH9sC3yf1QIojMdeTei0r/cgqK38P6fZAx0XErG7yrB8R3fZc8//uzcCJxR+IZXmuiojySV5V5WC1O+n7tQmwfURs1KM6HKBWXdkxl8/Ve8xF0pcj4qzC8hCAYmArz9PDdr0WWB84LyL2r5KvETvTO0gH86vtwL4fEW+vUce5wDuANUn/7NNJAat0x+O6d8g5qO1Hmk10AGlo71pgx4jYokq50gHzP+S/K2Whxi/9Zny2Sif2HkCahTcA+D/gqoh4tka515F6gksi4nd53RmkO51Cel9mRMTFVepYRh3HeSKi1jGTUn2lHsJjpO/nQ3WWuxs4pNSrkDQW+FoxMEraMiL+UqWOp4E7WHl4vu5TIiT9MiIOqqfN9ZA0mFeC3IPF3nKNcst7UIXRAyKi1szdinX0Vu5pbZMX7y4GZ0mjST/2ri8fmakpejHzxY/ls1LWII1l/5s0O2qDXtTxBlIX/5+8MovoKdJB+bpm0NSxjVqzq75ctjykfNvleSrUMZc01LFTlTzja9SxddnyCFIPcCJwEelkwXfWqKM/qad0EbAov5c/JO2QBuQ8+9eoY1od72k9eZry2ZJOT/hCfn2L68h/bN7u1wrr7gYuzo8/kn6BV6vjDtKv/O4eY4E7evFaNgf+mzQN+W115P99rXWkIaUef3b5+zadNIvtoBp1PJ2/79UeN/Ty8x1OOra0Vh15bywrdys1ZphWq6NKntNrpH+MNPP4kvJ2k4bKbwNe3+P3ojdvoB8BaRhsFulA64e7yXNyjTqGk2bLXEYa+tg7P7pIs4geAt5Yo45BdbT18jryrNLOtJ4ddi/e47VJY/HfIvVmXgYW1ijzNOnA8hTSMZZSUKp7ajbw4zryHN+Cz3a9wvN1gY+Qek2LScHpEtJwZa223gy8tbvPi3TMY1aNOvaqYzs185Tl3wD4KmlIeAHp2FHN7xmpt7NG4VG+rup3kcacElFxG/QsyH2GNPnmuMK68/L3/In8vuzQk3ZUahewZ406HifN4qv2eKxGHb8lHS/rLv0TwA978v2ICA/x9VaDjrlcSurdVJxIIelA4IMR0e0lkeo8NvCzqH6QdDhpJ3Yz6aTN4sHafUgn2+0WVWblVJpYUSHP6RFxQpX0dUkHWMfmx3akgPN70vlDMygbPqhQx0dIgWkp8AvgVxHxUj3tK9TxN1LPoprDosqxsAZ9tn8gHZ84hDRUuZR0DOrnwK8jD3vWImlGROxRtm67KEz7lXR7ROxYT32rStL2pCtpfAC4DzgHuKSe19PNUGP58BxR/1Bjb4fnG3EMehowISJm5OXdST2vd0XETZL2IPV6u/3eVvj/v4GV9we1jg0+Rvq/r2afiHhjlTpuiojdq1VQT56VyjhA9U4+XlLropO1jrncHFVmTtWTpxHHBhq0M32c2peo2TuqH4hfQvoF/BtgKnBTVJnBVE0+/nYgKcA+TzqutV1ELJO0X1Q5x6wRs80a+Nn+izRb7OfANRGxuFqd3dRTz4+HlYJYo0n6EGlocgxpSO+cKDueKOmYiPhulTpq/d8JOLva/12up/yUiE9FD06JKKurt0Hu+ijMJlW60simUTj2VetzqfD/v1Kwhpr//9OqBbCc5zsRcXyV9Hq+Y9MjYmy1POV8HlTvfb30y6c7kmpdYLHiNfh6mOcuav/DTqhRx8bdBSeAiLha0krncZVno/KEgvI81WxNOp6xC+kyR5tKmkE6xlDvCYNpQ2niwMXAxXmSwHjgp5JezvV3O0mC1HPsosZssxpNaMRn+yfS8ZR/18hXy4uS3hwRf62UKGkbKp+31WhTSMdrLyadkD02T3JY3hTS9em6DVDAsXX831X9rmrFUyIOj16cEpHzrNJ5fxT2v3kCzEGkKd1Fjfj/P7tGHfX8wO02OGUvS9osujlPUNLm1N4/VNywH216kHZ+I6qkjyDNfKlWRz3HBnarkT69jjqq5qG+SQPf6eH7synp4OtE0s7tdGC/VXzP16cHB/NJx8B+SjpPZPMWf7bDGvQ92410lYRPkqb79iPtHDcFjiYFi10bsa0a7ag10WKPnnw2q9COl0jnUE0kHQMrf3wNeKhGHY04Bn1Vfv9LPbAFwGsK6WNq/V/V+f9fNQ9pmLXqBKc6trFHhe9Yv/z8CNL5mD3+jjX1i+BHzQ91F9LB8sPLPtSN87oHWrTjaMTOtEfXh+umjlFly6WrH/wv6TI5y4B/NGA7r+1FmZ7ONuuIz7bQnn3yTmJp2eNB0vGFVrSh6izOnKdHEy162Y5VDpQNCnKb5MDwPOkag+8qpJ1Lug7jJ1v1HWnA+7pvN9+xucAevanTx6DaLA9xnM/KQ04PkC4WW3U4o0Ft2IXUSziNdIJgaZLEG0kzz04iTfi4pUod95HOaen2ZL862vFH0kmYY0k7ia1znTeRZkbNiIj7e1P3qlC68sKn8mMA8ImIqHkvp074bMvaI9LEk9L5Ng8Bf4qyS/Ws7iSNj+q3waiZpxHHoFdHFb5jD5CCfa8CjQNUB8gf6tt5ZUf2IGnH0bIPpxN2pvmA73xWDEgVj5u0wqrMNivU0fbP1hqvEUHOanOAsuXavTOV9OuocsWLVmnEbDMzW3UOUGZlck/u36STbB+m8nk3H49eXiPRzOrjaeZmK6s1dRdWvjuymTWYe1BmZeo8vrBXRNS6uZ6ZrQIHKDMz60hrtLsBZmZmlThAmZlZR/IkCbMOJmlT4H9Il2haQroaxeUR8YO2NsysBdyDMutsk4HrImL3SFe5nkC6ogUAkiZLmlBvZZKmS+pqcBvNmsIByqyz7Ui6qgYAETGddKNCs9WeZ/GZdTBJ9wK3k26A96+ytP8ETiTdYXcecHFE/EjSucBI0nDgY8DREfGspG8DnybdQfVx4MyIuEbSPqSe2UvAszl/tzenNGsVByizDiZpT9LNCvsBvyTdm2pGIX0yMC8iJhTWfSEi/ic/nwD0i4hT8vL0XMfkvLwZMAcYExH3S/os6Vbly2+kZ9YuHuIz62ARcQPptgxfJt1gb5qk82sUWyzp9/lmjx8Ctq+S91BgVuEq8VOAPSV1e+djs1bxLD6zDpeH9iYBkyTtAdyYb8H9UHnefFX675LuWTUvT4joqlL9RsA2uWdV8ndgGGl40KxtHKDMOpik8yLi06XliJgh6Sngdd0U2RG4PyLm5eUBNTbxCKkHNa6wzfVJx6LM2spDfGadbS9JO5YWcg9qGekupQDPAWtLWkfSJaT7d20haXBO36esvlL+N0s6E7gU2Cmfb1W6OeN0vG+wDuAelFlnOwM4M98CpB8pOI2PiH/n9CnAj0l3If4+cBWwP/BHSXNItxMfLemMiDgOuBA4nTTsd3xE/E3SocAUSUty/UdHxJJWvUCz7ngWn5mZdSR3483MrCM5QJmZWUdygDIzs47kAGVmZh3JAcrMzDqSA5SZmXUkBygzM+tIDlBmZtaR/h9WGy4KI/aP0QAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "ax = top20_state.plot(x=\"State\", y=\"count\", kind=\"bar\",fontsize=12)\n",
    "ax.set_xlabel(\"State\", fontsize=12)\n",
    "ax.set_ylabel(\"Accident\", fontsize=12)\n",
    "ax.set_title(\"State-wise Accident\")\n",
    "rcParams.update({'figure.autolayout': True})\n",
    "plt.tight_layout()\n",
    "# plt.rc('text', usetex=True)\n",
    "plt.rc('font', family='serif')\n",
    "plt.rc('xtick', labelsize=12) \n",
    "plt.rc('ytick', labelsize=12) \n",
    "# plt.xticks(rotation=90)\n",
    "plt.savefig('/Users/pprusty05/google_drive/Data_Mining/Project/plots/State_vs_Accident.pdf')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Hour Vs Accident"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## add hour column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_accident = df.withColumn('Hour', hour(df.Start_Time))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## convert to pandas df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_hour=df_accident.groupby('Hour').count().toPandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## sort by hour"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_hour=df_hour.sort_values('Hour')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3debwcVZ338c8XSAibBJJL8CGGINGwOJCBGxwBZXFgWEQBUQS3yBLkUUeEYRFB2cZBJaMzjgoRQhDZBmXkAYygkrAEAkkg4LAJSIQMAglgEmUCIfk9f5zTodLpvul7b9/uSu73/XrVK13n1K/qdKdv/bpOnapSRGBmZlY267S7AWZmZrU4QZmZWSk5QZmZWSk5QZmZWSk5QZmZWSk5QZmZWSk5QVm/IWk/SXMkhaQ7JHXk8pMlzZW0UNK17W5nNUl7Srqtj9Y9SNIiSUc2aX112yrp2Pw5T27GtmztJ18HZf2JpL2BqcCAiHizUH4O8PcRsWebmlaXJAFvi4iFfbDuw4FLgakR8dEmrK/LtubPeWREjGvCtuYC4yJiWm/XZeXkIyizkouk6ckpOwz4R+BASRv3dmV93FbrZ5ygzGpQcqqkGZLuljRJ0ia57rbcTThS0gZ5mch1AyVNy/VfkHSLpMX5yK24/i9K+oukhyXtmsvmSdo+v/6ZpBckfay4/lw3Ppf9VtLNkrbL5QMkfUfSPbnN38hHNPXe4wbAAOBa4K/Ah6vqt5U0JXeH3ivp3ELdfpKmS5oq6c7czo4abe2Q9EtJ90u6HuiosY3b8jbukrR7Lv+wpMdz+bfyeqdL2iLXXw5sCXwvf967NvY/a2uUiPDkqd9MwN5AAHcA0wrTXODuwnKfBh4BNszzlwKXFeqD1FUFMDL9Ka20nQC+nl8fBexSoy0/Bb6aX48B3gROyfO7FV6vWD+wMfAysH6e/zKpmwvga8DtwLqkxHMP8KkuPosjgKPz6x8CvyjUrQs8Cnw2z28KzMuvtwEWAe/K82NJXYSrfBbAfwKX5NdvA34PTM7z6wGPAcfk+Z2ABcAmeX4cKXFuk+d/Wfm88vxcYO92f6c89d3kIyjrrz4YEXtXJmByVf1ngOsi4rU8fznwaUnrdmMbNwJExDUR8UCN+puBD+XXBwGX5H/J5TfXiFlGSn6fkbQh8APg6lw3DrgiIpZFxFLgelKireegShvzOg6Q9LY8/3fAKOCq/B4WApWBFEcDsyLiyVw3EzireuX5szqMlIiJiEXArwqLvBfYFrgy1z8M/E/hMwF4IiKeya8fJiVH6yecoMxqGw7ML8zPJx2VDOvGOlaci5E0JndFTZP0vVx8K7CrpCHAe4ALgT1zV+J2EfFE9Qoj4n+BvfL0DCmpVZLKcODkynaAT5KOhFaRk9s+wC152X/OVR8prOvVKAwkiYjphbriZ1OsK+ogHSUtKJS9Ung9nJRsf11o8/qko7WKRYXXS4CBtd6PrZ3Wa3cDzErqOVY+X9IBLAVezPNLSTtTgMGrW1lEzCF1LxbLXpV0H/ApYEFEPCfpceCzpCOJVUgaALwYEZ+StCnpyG9CjnkOuCAirs/LrtNF2z4EnBER1xXW/SPSUdKVeV2bSVqvkqTy+bG5uW50Vbt2jYjZVduYT+q27CB15QEMKdQ/ByzNR7CV9WwELK/TZutnfARlVttk4ON5IAGkBHBlRCzL88+QjnrgrW65nrgF+AZvdX3dDJxHOt9Sy1bAj2FFt9sc3jpKmgwcXeiG/CzpvFQth7JydxvAL4D9JA0G7gOeInXnIWlz0vmkN4FrgE5Jo3LdHtTo4suf1Q3kbsbcfXhwYZH7gGfzUHckrZfb8O46ba62GNhQ0j6SvtxgjK1J2n0SzJOnVk3AfqQdemWQREcuP5l0ZLAQuLaw/D8B9wJ3A5PIJ+9z3UeBp0mJ5P/mdU4jJYvb8vwMYN/VtGkH0kCAQXl+d9KOd2Ce78jrqax/I+Cy3KY787ZG5GUHkLoJZ5Cu9bqSPMijapv/Qupq+36hbF1get7OHGAE6fzQlPxZ3QXsVVh+/7z81PwZDK/R1nVz2S+BmaTzXZcCL/DWAJJtSYmyso3KgIl9gceBPwPnkkYYzs2xJ+dlvkgayDED2LHd3y9PzZ98oa6ZmZWSu/jMzKyUnKDMzKyUnKDMzKyUnKDMzKyUfB1UNw0dOjRGjhzZ7maYma01Zs+evSAiOqrLnaC6aeTIkcyaNavdzTAzW2tI+mOtcnfxmZlZKTlBmZlZKTlBmZlZKfkclJlZiyxdupR58+axZMmSdjelLQYNGsTw4cMZMGBAQ8s7QZmZtci8efPYZJNNGDlyJF087HitFBG8/PLLzJs3j222aeyxXu7iMzNrkSVLljBkyJB+l5wAJDFkyJBuHT06QZmZtVB/TE4V3X3v7uIzM2uTkWfc0tT1zb3w4NUvtAZxgjKj6x3F2vZHb9ZX5s6dy5w5czj00EObsj538ZmZWVPMnTuXX/ziF01bnxOUmVk/ct5553HWWWdx/vnnc8QRR/D8889z3HHH8e1vf5vjjz+e6dOns2TJEsaNG8e4ceMAmDhxIpV7kN50002MGjWKr3zlKxx33HHsvvvuzJ07lzfeeIPJkyczZ84czjnnHB5//PFet9UJysysn7j11luZMWMGF1xwAWeffTYHHnggp5xyCgcccACnnXYaF1xwAUceeSTrr7/+iuQEMH78+BWvDznkEPbcc0+22247Lr30Ug477DB+/vOfM3DgQMaNG8eYMWM455xz2G677XrdXicoM7N+4uGHH2bUqFEr5o899lgefvhh3vnOdwIwbNgwFi5cyIIFC1a7rne/+90AdHR0sHjx4j5prxOUmVk/sfPOO/P000+vmJ80adJKZS+88AKDBw9m6NChbLLJJixatAiAZ599dpV11Royvu666xIRLFmyhCeeeKLX7fUoPjOzNmn1CNH999+fGTNm8NWvfpVBgwYxZMgQLrroIr72ta/x5JNP8tRTT3HttdciiTFjxrB8+XLOP/98Ro4cycKFC7nmmmvYdtttefjhh7nyyisZPXo0N910E6+++ipPPfUUO+64I/PmzeOUU07hQx/6EKNHj+5VexURTXrr/UNnZ2f4eVBrHw8zt1Z47LHH2H777dvdjLaq9RlImh0RndXL+gjK1ipONGZrD5+DMjOzUnKCMjNrof58WqW7790JysysRQYNGsTLL7/cL5NU5XEbgwYNajjG56DMzFpk+PDhzJs3j/nz57e7KW1ReWBho5ygzMxaZMCAAQ0/rM/cxWdmZiXlBGVmZqXkBGVmZqXkBGVmZqXkBGVmZqXkBGVmZqXkBGVmZqXUkgQlaYCkkyRNk3SHpHslfbBQ/0Kuq0wXVMWfIml2nk6tqhspaaqku3LsNs2KNTOz9mnVhbpbAV8GxkTEQkn7ATdKGh0R/wP8KiLG1QqUdABwPDAmF82R9GhEVG5bfQ0wMSIul/Q54Dpgt97GmplZe7Wqi28x8PWIWAgQEb8GlgC7NxB7AnB1RCyJiCXAVcDnASTtTEo+V+VlrwL+RtKuTYg1M7M2akmCioiXI+LKyrzSs4IHApUbUm0vaYqkuyVNlDS0ED4WeLww/yjQWah7JiLeyNt5A3iqqr6nsWZm1kbtGiSxF/BH4M48/whwJPB+YCEwRW898H5YLqv4M7BFnbrV1XcndgVJ4yXNkjSrv97k0cys1VqeoCQNAr4JjIuI5QARcUxELIp0D/pzgF1Y+VxQV/emr1Wn1dQ3Gktu38SI6IyIzo6Oji5WZ2ZmzdLSBJWPiiYC342I2bWWiYi/Aq8AW+eil4DBhUUG81bXYHVdpf6lJsSamVkbtfpxGxOA+yPieknrk7rZRgGLImIWgKSBwGbA8zlmJjC6sI4dclmlbhtJAyPijRy7bVV9T2PNzKyNWnYEJel0UkKcLGljUjI4BhgBnFg45/Ql4Gngvjx/MXCUpEG5e/DoXEZEzAEeAo7Kyx4FPFI4OutNrJmZtVFLjqAkvRu4MM9+qVB1LnA78AHgrpykFgOHRMRSgIj4laQdgek5ZlLhOiaATwCTJB0HLAM+XqnoTayZmbVXSxJURPyeGoMPCo5ZTfwEUvdgrbq5wL59EWtmZu3je/GZmVkpOUGZmVkpOUGZmVkpOUGZmVkpOUGZmVkpOUGZmVkpOUGZmVkpOUGZmVkpOUGZmVkpOUGZmVkpOUGZmVkpOUGZmVkpOUGZmVkptfqBhWZmpTPyjFvq1s298OAWtsSKfARlZmal5ARlZmal5ARlZmal5ARlZmal5ARlZmal5FF8ZrZW6GokHng03prIR1BmZlZKTlBmZlZKTlBmZlZKTlBmZlZKLUlQkgZIOknSNEl3SLpX0gcL9WNy2XRJN0kaUqiTpO9ImilptqRPV627T2LNzKy9WnUEtRXwZeAjEbEX8HXgRklbSRoI3AicGRF7AA8AFxdiTwB2Bd4L/ANwkaSdAPo41szM2qhVCWox8PWIWAgQEb8GlgC7AwcCyyJial72UuBwSR15/gRgckQsj4gFwM3A8bmuL2PNzKyNWpKgIuLliLiyMi9JwEBgPjAWeLyw7HPAa8AuktYHdirWA48Cnfl1n8T25r2amVlztGuQxF7AH4E7gWHAwqr6PwNbAENJbVxYo44+jF2JpPGSZkmaNX/+/NW9NzMza4KWJyhJg4BvAuMiYnkujlqLFl5X13dV18zYtFDExIjojIjOjg73AJqZtUJLE1Tu2psIfDciZufil4DBVYsOzuULgOVV9ZW6vow1M7M2a/UR1ATg/oi4XtL6kkYAM4HRlQUkvQPYEJgdEa8DvyvWAzvkGPoqtgnv08zMeqllCUrS6aSb006WtDGwLXAMMAVYT9JeedFjgBsionKy52JgXL6maQhwMGnEHX0ca2ZmbdSSu5lLejdwYZ79UqHq3Ih4XdKhwA8lLQNeBcYVlrmElMzuJyXUUyPiIYA+jjUzszZqSYKKiN9TY/BBof5B4H116gI4tdWxZmbWXr4Xn5mZlZITlJmZlZITlJmZlVKPE5SkrZrZEDMzs6KGEpSk62oUn16n3MzMrNcaPYJa5f4+EfGPwPDmNsfMzCzpcpi5pGdI96vbUtIfqqo3BP67rxpmZmb92+qugxpHun7pu8BJVXWLgYf6oE1mZmZdJ6iIuANA0pH5YtuVSNqOlZ+3ZGZm1hQN3UkiIn4vaUtgJOlBgxXfww/4s35s5Bm31K2be+HBLWzJ2sOfqVU0lKAk/TPplkEvkx7VXjGsLxplZmbW6L34jgBGRMQLxUJJP2h+k8zMzBofZv5QdXLKftLMxpiZmVU0egT1uKSbgFuBRYXyM0gPATQzM2uqRhPUl4A5wEeryrdsbnPMzMySRhPU1RHxhepCSd9qcnvMPIrLzIDGh5mvSE6SNo+IV3L56X3VMDOzsvOPqb7V6M1iN5b0Y0mvATMlDZF0j6Rt+7h9ZmbWTzU6iu9i4BVgD+D5iHgZ+Bzw/b5qmJmZ9W+NnoPaKiI+BSBpKUBEPCFpYNdhZmZmPdPoEdQgSZsXCyQNBjZqfpPMzMwaT1A/Il0LdRmwjaTvAw/gLj4zM+sjDSWoiPgJcBiwDHiMdMPYT0bE1X3YNjMz68caPQdFREwHphfLJHVGxKymt8psLefhyWarVzdBSfpAA/HdetyGpLHANcAFETG5UP4CKz9X6u6IOKtQfwpwdJ69NiK+U6gbCVxOei/LgM9FxDPNiDUzs/bp6gjqVqByg9gAhgP/SxpuvjmwMfDHRjck6TDgY8DCGtW/iohxdeIOAI4HxuSiOZIejYjKT9BrgIkRcbmkzwHXAbv1NtasjHzkZf1JV+egboqIbSJiG+DHwEcjYtNctilwONCdc1AzI+Jo0qPiu+ME0q2WlkTEEuAq4PMAknYmJZ+r8rJXAX8jadcmxJqZWRvVTVAR8fHC7H4RcVNV/Y3Ano1uKCLmdVG9vaQpku6WNFHS0ELdWFbu/nsU6CzUPRMRb+RtvAE8VVXf01gzM2ujRgdJDJG0W0TcXymQ9D5SV18zPAKcRDq6+jYwJW8vSE/tLXYL/hnYIr+urltdfXdiV5A0HhgPMGLEiIbflFl/5u5I661GE9QZwG/zYIYFQAdpR/7xLqMaFBHHVF5LOof0zKndgPsqi3QVXqNMq6lvNLbSvonARIDOzs6u1mdmZk3S6N3Mp0jaBjiY9AyoPwG35HvyNVVE/FXSK8DWpAT1EjC4sMhgYH5+XV1XqX+pTn13Ys3MrI26cx3UAuCKYpmkr0bEv/SmAZL2BRZVrqfK9/fbDHg+LzITGF0I2SGXVeq2kTQwIt7IsdtW1fc01szM2qjuIAlJn5K0Tn49qdZEetJub40ATpRU6Vr7EvA0b3XvXQwcJWmQpEGka5ouBoiIOcBDwFF52aOARyJidhNizcysjbo6gtoDuAF4DdgHmFxjmdcb3VAevj2BNLT7DEkfjojDgduBDwB35SS1GDgkIip3Tf+VpB156y4WkwrXMQF8Apgk6TjSxbYrzov1JtbMzNqrboKKiBMLs2dHxE+rl5H0dKMbykcme9cofxY4ZpWAlZeZQEputermAvv2RayZeTSetU+jdzOfLukzkrYGkLS1pENqJS0zM7NmaHSQxIXAi8BteX4xcEi+VunsPmmZmTWVj4RsTdNoghoWEUdWZiLiFWC8pDv7pllmZtbfNdrFN6C6II/wa3iYupmZWXc0mmDulHQrcCXpQtcO4NPAtD5ql5mZ9XONJqizgNOBr5Meu/Ecadj5d7qIMTMz67FGb3W0DPhmnszMzPpcQ+egJO2b7x4xNs/vJGmCpI37tnlmZtZfNTpI4kzgZuCBPP8IMBuY1BeNMjMzazRBERE35K4+ImJZRFwNDF1NmJmZWY80mqA2krTSwwnz/KDmN8nMzKzxUXz/ATwi6RbeGmZ+EPBPfdUwMzPr3xo6goqIq4CPAsuBnYE38/zufdc0MzPrz7rzwMJ7gHvyc5UOAk4CDgG+2EdtMzOzfqzRYeYDJH1Y0lWkm8ZeRnp+0vNdR5qZmfVM3SMoSesB/0B6iN+hpG69G4A/ALtFxFJJB7WklWZm1u901cU3P9ffRLrv3pSclG4vPO32ly1oo5mZ9UNddfF9EagkoPUA9X1zzMzMkq4e+X4VcJWktwEfASZJ+guwuaR1ImK5pAMjYkqrGmtmtjbwwyMbs9pRfBGxiPSYjSslbUo6H/VTSW+ShpmP6tsmmplZf9StBw5GxELgCuAKSZsBt/dJq8zMrN9r+F581SLiVWCvJrbFzMxshR4nKFjR/WdmZtZ0vUpQZmZmfcUJyszMSqmlCUrSWElPSRpXVT5G0r2Spku6SdKQQp0kfUfSTEmzJX26FbFmZtZeLUtQkg4DvgIsrCofCNwInBkRe5Ce2ntxYZETgF2B95JuvXSRpJ1aEGtmZm3UyiOomRFxNLC4qvxAYFlETM3zlwKHS+rI8ycAkyNieUQsID16/vgWxJqZWRu1LEFFxLw6VWOBxwvLPQe8BuwiaX1gp2I98CjQ2Zex3X1vZmbWfGUYJDGMqm4/4M/AFsBQUhsX1qjry9iVSBovaZakWfPnz2/gLZmZWW+VIUEBRI0ydVHfVV0zY9NCERMjojMiOjs63ANoZtYKZUhQLwGDq8oG5/IFpMfMD65R15exZmbWZmVIUDOB0ZUZSe8ANgRmR8TrwO+K9cAOOabPYpvyrszMrFfKkKCmAOtJqtzX7xjghoionOy5GBiXr2kaAhxMGnHX17FmZtZG3bqbeW9I2hWYAIwBzpD04Yg4PCJel3Qo8ENJy4BXgXGF0EuAbYH7SQn11Ih4CKCPY83MrI1alqAiYjawd526B4H31akL4NQu1tsnsWZm1l5l6OIzMzNbhROUmZmVkhOUmZmVkhOUmZmVUssGSVj/M/KMW+rWzb3w4Ba2xMzWRD6CMjOzUvIRlJnZGqK/9Ur4CMrMzErJCcrMzErJCcrMzErJCcrMzErJCcrMzErJCcrMzErJCcrMzErJCcrMzErJCcrMzErJCcrMzErJCcrMzErJCcrMzErJCcrMzErJCcrMzErJCcrMzErJCcrMzErJDyw0M+sH1sSHHZbiCErSZEnTqqaNC/VjJN0rabqkmyQNKdRJ0nckzZQ0W9Knq9bd41gzM2uf0hxBRcTetcolDQRuBMZFxFRJ5wIXAx/Li5wA7Aq8F9gceETSQxHxcG9i++RNmplZw0qToLpwILAsIqbm+UuBuZI6ImI+Kcl8NyKWAwsk3QwcD3ypl7GWrYldA2a25itFFx+ApIsl3SVpiqR9ClVjgccrMxHxHPAasIuk9YGdivXAo0BnE2LNzKyNynIE9Rhwe0TMlDQWmCppz4iYAwwDFlYt/2dgC2AoKckurFFHL2NXkDQeGA8wYsSIbr85MzPrvlIkqIj4VuH1TEm3kLrfTqwU1whTcRXdqOtObKVNE4GJAJ2dnbXWZ2a2VmpnF39puviqPAtsnV+/BAyuqh+cyxcAy6vqK3W9jTUzszYqRYKSdFpV0TDg+fx6JjC6sOw7gA2B2RHxOvC7Yj2wQ47pbayZmbVRKRIUcLKkLQAkbQN8BLgy100B1pO0V54/Brghj8KDNGx8XL6maQhwMGm0Xm9jzcysjUpxDgq4CPgvSW8CGwFfjIg7ACLidUmHAj+UtAx4FRhXiL0E2Ba4n5RwT42Ih3oba2Zm7VWKBBURF5GSVL36B4H31akL4NS+iDUzs/YpSxefmZnZSpygzMyslJygzMyslJygzMyslJygzMyslJygzMyslJygzMyslJygzMyslJygzMyslJygzMyslEpxqyMzM1u7NOM5Uj6CMjOzUvIRVC+182mTZmZrMx9BmZlZKTlBmZlZKbmLbw3TVZcidN2t6O5IM1uTOEG1iZOFmVnX3MVnZmal5ARlZmal5ARlZmal5ARlZmal5ARlZmal5ARlZmal5ARlZmal1O8TlKRBkiZLmiFplqT9290mMzPzhboA5wCKiL+T9G5ghqTtI+LFNrfLzKxf69dHUJLWAY4DLgOIiN8DDwKfame7zMysnyco4J3AEODxQtmjQGd7mmNmZhX9PUENy/8uLJT9GdiiDW0xM7MCRUS729A2kvYA7gbWj4g3ctn5wB4RsW9hufHA+Dw7Gniii9UOBRb0oDlre1w7trmmxLVjm36P5YlrxzbLFrd1RHSsUhoR/XYC3gUEsEWh7PvAtb1Y5yzHlWOba0rcmtRWv0d/Nq2M6+9dfE8Dr5COiip2AGa2pzlmZlbRrxNURCwHfgwcAyDpXcAY4Kp2tsvMzPp5gsrOASRpBnANcFREvNCL9U10XGm2uabEtWObfo/liWvHNteIuH49SMLMzMrLR1BmZlZKTlBmZlZKTlBmZlZKvllsD0nqIN2zr5N05wkBL5GGqF8VES81eXtvB3YH5kfEnYXyd5BGIUZEnFcnthMYCdwWEYvy/D8CGwG/iYgfdaMd/xURh0k6KyIu6Pk7WmW97we2jIjr8/zHSe9rOPA8MDkirm7W9grb3R/4W2BGRNwhaQxwKvA24LfAv+fRntVxuwHHkkZ9bgYsAf4I3A5cGhGLm91W698kDQF2YeX9zeyIeLmtDetDHiTRA5L2A64l3Vj2UdLtkQAGk66j2hn4RET8tknb2xe4kbQTHAjMBT4bEXMkDQRGAb+LiHVrxH4R+C7wKrAY+ATw/4BbgL8Ch5J2qOcXYj7QRXO+B5wEfC8idqmxvQ0j4rXC/E7ACcDWpETzk4i4u0bcQ8C/RsQV+c4dZwFXAvOADuBIYFJETKgR+wvg58ANEfHXLtpeHXcacDrwJOn/7WTgFODXwFLgw8DPI+KMqrhxwL/l5V4EtgQ+SLpkYTNgD+CwiCje47F62/1uZwMg6YSIuERSZ0TMand71gSSNgMuAQ4HXmPl/c0GwA3A5yPi1Ra1ZwDwdoCIeLaB5QXsQ/pBfF+3frz39Mrn/jwB9wPv6qL+XcD9vVj/z6rmpwN/X5jfO5cdnuc3ApbXWddDwKj8+qPAM8CYQn0H8GBVzBukZPaHvHxxeq3yb53t3V7VzsWkZP5t4ApSojy6Rtxdhdd3kI6mivUbAffW2eZ/kxLn08BPgX8g//hazec8G9g8v34n8DIwvFC/CSlp1PpM31FVtj1wdX69A/DLOtvcDPhP4E1gEfBsnhaRkuJ1wGYt+h4PAEYAIxpcXsC+wCEU7r7SzW3eXv09Wc3yY4EjSL0H69ZZZh1gvarvyieAfwKOBt5WJ+5QYINefH7DeOtHvoD98t/YVg3Evg84ETiT9MPoo0BHnWVvJF0OM7xG3TuAbwA39vA9HFenfCgp8S3K+4FTKp9/3uY/A8tqxO0MPJL/7icAg4B7ST+uF+Zpr4bb16wve3+a6u0oG1kG+EwD06NVMdNqrGcQaYd/Yv6DXOXLkpf7bdX8IzWWmVY1/y7SNWETgf9TVVfZwUyts72phde3AJ1V9dtQI3kDd1dvo8YyNW+XUtlm3lHtnz+X3wP/CvxtF/9Hd1bNP1VjmTtWF1fnPdRbpqU7m3bsaEg/YP5QZ3qt8m+NuPMKr4eQfgi+STq6XAo8Rv6xVRU3hXT0DLBjXn4ecF/e1kvALjXiFpB+1FwB/D0N/KjJcbsB/wMsA54i9WDcnj+Tl4G/AAfUiR0F/A5YTurBWA68TvqR9QrwHxSSbY65q4E21fy+NRBX72/talIPxsGk5Hk56Yfj0Fy/MTV+FOdlzgEOIv0w/VmeXyfXH1n8O1lt+3rypvr7lP+zLgf2zDuAAaTzeUNJ3TuXkbqjasU+STpPNbWL6ZWqmKnAxnXWN4HU3VQvQf0GGFKYf09V/QbAPXViO0lJ5lvApsUvNDC2TkzxCKreTnpajbJjSXfwGE369Xst6VfzqPzvZaTuwS63WfW+PgHcROqKPaNWHHBe/mP6UZ6/IP8/bgp8vk5brwfOBrYC1icdff07cFmuH0Lqcq3V1pbubNqxowHOJSWIM4G9CtPe+f9iL6qO2mt8dy4mHXVvmOcHkJLrKkemwJzC6xuBT1bV70ONH1Skv6v18mdzNelHzbeBnVbzOd8FfA7YDjiNlMiPK9QfROrKqhU7FfgK+VO6WZUAAAbkSURBVO+ZdK7zG6SjvUGkru0JVTH3AB/ooj3vp8bfMCnxLetiWk79/Uat9e0OTCN119f8UQxMr/obnN+Tv4EVy/bkD6G/T6Sd0oXA/Br/6fOBb5LukF4rdixpEEVX659aNX8S6ZfeJ+ssf3qtnUyu+1LeKbynRt3hue7c1bTngPyHdfrqvlxVO5kfAbtW1Q+l/o7tY8DDpF/LbxY+08XADyo7q662Wad+CHBijfLtSL+y/0LaCW9MOq9U2e6z1P7lvQVpAMXywrK3krvmgJ8A36zTlpbubNq1o8mf0fdJiW3H6v8r4Oyuvvf11l3ru8PK3cPT68Stsr7q703+LD4N/Ap4gJQ0/k+NuGlV84/VWGaVI+9cXq9n5c7C6+lVdWNI552fAm4m/ZD7aX79JFXd9oW400g/wLauM43soj2/rVO+df7ufKDO9+Y3VfOnru7z62pqaCFPdT681O+8bf6Dr/zab+Tcx6G1vviF+l2q5tfLX4zNu4ipe06si5jN83prJtMa7/WTwMWrWe5yYFKeLi9+QUmj4/4EnLyadXSQEvmepC6bgatZ/k/AfzTx/3U7YKcGtrsVqbun7v9ljZhGdjY714jr0c6m3Tua/PdxVf4ujKCLHxOs/OPmFmCjGsuscnRJGpxyQH7978AhVfUfqPPZdNWWLUgjXWsl+Acrf2/A35EG/xST8DDggTrrvRt4Z1XZbsCvC/O1usAHkAbtnE/qep9IOtr/SFffU1L3ZVf7mjPrlF8GXARsU6NuaP7u1PreXJy3+fYadW8Hvgr8Z6N/Lx7FZ9ZieRTUgaQkXHloZuUShSmRn01WI+4K4KsR8Xyd+jMj4ptVZZeRBqb8ICKeqaobQhr9+P6oGgEq6WLSkdMZEfGnqrq3A+NIv9qPXP07Bkm7krr+to2I7ess8yfS+SSRBp38d0QcV4g/EaBSVoh7GykJDiUN9T+MdPT7IinRbEJKWrOq4h4EvhAR9zTyHgpxx5J2xK8C65IGcVxL+v9bSurCPDci/q1G7AGkQTL3kc6BbQXsChwU6TKHH5B6O/aqilvdZSbLozASt7ckbU36P54dETfXqN+I9F08q6p8GPAeUoJ+taruYNJ7/VlEPNpQO5ygzNZefbij6STtaB7pZns2joi/dO9dgKR9eOsI7Lk6y+xCGu4/ktQN/yowhzToZJVtSvos8HpEXNuD9ryHdKQ9PSL+JGk70uc8kHT0+csuYkeRzuFtRTpXd3VEzM11I4G/RsT8wvK9ucykx4mtp9de9uaazVXW5QRlVi6SfhYRR7S7HX2pP7zHZpE0HfhGRPwmz+9NGn35rxHx8/wjY3FErFMV15vE1qPY3myzFt9JwqyFJH2mgcV26OG6e7TTb3Zcf3iPfRVXJ3ZpJTkBRMQ0SR8ELpG0BWlQTq0jjfNJF4wXE9sPJE2IiBsk/ZHUnVpLT2N7s81VOEGZtdbZpDsBdNXNtWV1QU93+q2Oy9bq99ibz6aHsVHdNRoRS4DPSppAGjVcS1eJbRj1E1tvYnuzzVW4i8+shSSNBU6KiE92sczUiNinquxJVr/T3zkiNm9nXI5dq99jLz+bbsdKOol0ucjXI2KVp31LOh34lxpdfFNJg0NqnX+bQDpA+WKdLr4exfZmmzU1OtzPkydPzZno5mUGuazb18+1I64/vMfefDY9iaWHl5nQu+snexTbm23WmnwEZbaGkHQo6RqZesPMd4mIB9od1xtrynvszWfTqs9V0nqkkYKLI+KVOsu8KyKebFZsb7ZZc1knKDMzKyM/sNDMzErJCcrMzErJCcqsxCTtJ2mOpJB0h9KTnJF0sqS5khZK6vadEMzWBD4HZVZy+WLHqcCAiHizUH4O6UGWe7apaWZ9ykdQZmZWSk5QZmsBJadKmiHpbkmTJG2S627LXYQjJW2Ql4lcN1DStFz/BUm3SFqcj9rM2sq3OjJbc/y2kliykaS7YQN8inQ37bER8ZqkS4HvAcdGxP6VuIj4X0mfID13ikiP9tg71w+JiIMlHUV6RLxZWzlBma05PljrHFSe/QxwXUS8lucvB6ZKGh8Ryxpc/40AEXFNk9pr1ivu4jNbOwwH5hfm55Oewjqs9uI1LWxqi8x6yQnKbO3wHNBRmO8gPd31xTy/lPQQP4DBLWyXWY85QZmtHSYDH5e0QZ7/LHBloXvvGdITcgEOanHbzHrECcqsxCTtRxrsAGmQxIoLdUmDIv5G0rURcTUpSd0u6W7S+eWTCqs6E/i2pF+SHvdAHr23rqTb8jLXKj0R1awUfKGumZmVko+gzMyslJygzMyslJygzMyslJygzMyslJygzMyslJygzMyslJygzMyslJygzMyslP4/CJRdBTYLC7AAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "ax = df_hour.plot(x=\"Hour\", y=\"count\", kind=\"bar\")\n",
    "ax.set_xlabel(\"Hour\", fontsize=12)\n",
    "ax.set_ylabel(\"Accident\", fontsize=12)\n",
    "ax.set_title(\"Hour-wise Accident\")\n",
    "rcParams.update({'figure.autolayout': True})\n",
    "plt.rc('xtick', labelsize=12) \n",
    "plt.rc('ytick', labelsize=12)\n",
    "plt.tight_layout()\n",
    "plt.savefig('/Users/pprusty05/google_drive/Data_Mining/Project/plots/Hour_vs_Accident.pdf')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Road Vs Accident"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_road=df['Bump','Crossing','Give_Way','Junction','No_Exit']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_road=df_road.toPandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Bump</th>\n",
       "      <th>Crossing</th>\n",
       "      <th>Give_Way</th>\n",
       "      <th>Junction</th>\n",
       "      <th>No_Exit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2974330</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2974331</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2974332</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2974333</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2974334</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2974335 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          Bump  Crossing  Give_Way  Junction  No_Exit\n",
       "0        False      True     False     False    False\n",
       "1        False     False     False      True    False\n",
       "2        False     False     False      True    False\n",
       "3        False     False     False     False    False\n",
       "4        False     False     False     False    False\n",
       "...        ...       ...       ...       ...      ...\n",
       "2974330  False     False     False     False    False\n",
       "2974331  False     False     False     False    False\n",
       "2974332  False     False     False      True    False\n",
       "2974333  False     False     False     False    False\n",
       "2974334  False     False     False     False    False\n",
       "\n",
       "[2974335 rows x 5 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_road"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          Bump  Crossing  Give_Way  Junction  No_Exit\n",
      "False  2973881   2766745   2966708   2735888  2970940\n",
      "True       454    207590      7627    238447     3395\n"
     ]
    }
   ],
   "source": [
    "df1 = df_road.apply(pd.value_counts)\n",
    "print (df1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3de7xmc93/8dcbw8iZmdyYYSZRIacmJJWckg7olihMkskvxO9WN/K7U6Hc962Skm4yOeQQSsadQujgbEiEZIhmxmDMOBZyeP/+WN/dXLPn2nuumdl7r7Vnv5+Px3rstT5rXWt9rou5Ptf6ru/6LtkmIiKiaZaoO4GIiIh2UqAiIqKRUqAiIqKRUqAiIqKRUqAiIqKRUqAiIqKRUqAiOiBpjCRLWqruXAYbSd+X9B+9rLekNw5kTjE4pEDFoCbpYUkvSHpe0mOSzpK0fI35/FrSp+s6fl+R9OVSOLZc1H3ZPsj2cX2RV2/Kf/vj+/s4MXBSoGJx8CHbywObApsBR9ecz6AmScB+wOzyN6IWKVCx2LD9GHAlVaECQNKHJd0j6elydvOWlnVHSXpQ0nOS7pW0e8u6JSWdJOlJSQ8BH5jf8SWdALwL+G45o/uupFMlfaPbdpMk/d8y/7Cko8vxn5L0Q0nDW7b9oKQ7S/43Stq4h2OfJumkbrHLJP1bmT9S0vTyXu+XtH0vb+VdwBrA54C9JC3dss9lJX1D0iOSnpF0vaRly7ptSo5PS5oq6ZMlPteZjaQvSJoh6VFJn+qW8zLlc/+rpMdL82DX/reVNE3SEZKeKPvYv6ybAHwC+Pfy2V/ey/uLwcJ2pkyDdgIeBnYo86OAu4Fvl+X1gb8BOwLDgH8HpgBLl/UfBdak+qH2sbLtGmXdQcCfgNHAqsB1gIGl5pPPr4FPtyxvATwKLFGWRwB/B1Zvyf+PLce5ATi+rNsMeALYElgSGF+2X6bNcd8NTAVUllcBXijv701l3Zpl3Rhg3V7ew5nAReUzmwX8a8u6U8t7XKvktDWwDLAO8Bywd3ndasCm5TVntbynnYHHgY2A5YDzy+f6xrL+W8Ck8lmsAFwOfL2s2xZ4BfhqOcYu5bNcpftxMi0eU+0JZMq0KFP5wn6+fDkauAZYuaz7D+Cilm2XAKYD2/awrzuBXcv8tcBBLet2WpgCVWL3ATuW+UOAK7rl33qcXYAHy/xpwHHd9nU/8J42xxXwV+DdZflA4Noy/0aqQrcDMGw++b8OeBbYrSz/D3BZy+f3ArBJm9cdDVzawz5bC9RE4MSWdet3FajyHv5GS/EE3gH8pcxvW46/VMv6J4Ctuh8n0+IxpYkvFge72V6B6gvszVRnKVCdPTzStZHt16jOJNYCkLRfS/PZ01S/6ltfO7XlGI+w8M4G9inz+wDndlvf/Thrlvl1gCO68is5jm5Z/0+uvqEvpDqDAfg4cF5ZNwU4HPgy8ISkCyXNs49id6qzlCvK8nnA+yWNpPpshgMPtnnd6B7i3fX2uY6kKpC3t7zfX5Z4l1m2X2lZ/jtQW6eY6F8pULHYsP0bql/RXddiHqX6kgf+efF/NDBd0jrAGVRnNKvZXpmqqU1l8xll2y5rd5pGm9iPgF0lbQK8BfhZt/Xdj/NomZ8KnGB75ZbpdbYv6OHYFwB7lPe2JfCTfyZln297G6rPw8B/9rCP8VRf+H+V9BhwMVVz2seBJ4EXgXXbvG5qD/Huevtcn6Q6Q9qw5f2u5KoDTCfyaIbFTApULG5OBnYsxeAi4AOStpc0DDgCeAm4ker6h4GZAOVi+0Yt+7kI+JykUZJWAY7q8PiPA29oDdieBtxGdeb0E9svdHvNweU4qwLHAD8u8TOAgyRtqcpykj4gaYV2B7b9e6ov+R8AV9p+ury3N0naTtIyVAXmBeC17q+XtBawPfBBqo4mmwKbUBWz/coZ6ETgm5LWLB1J3lH2ex6wg6Q9JS0laTVJm3Y/BtXn+klJG0h6HXBsS/6vlff8LUmv78pJ0vvavd825vnsY3BLgYrFiu2ZwDnAl2zfT9Wk9h2qL+4PUXVJ/4fte4FvADdRfbG9laqDQpczqHoE/gG4A/hphyl8m+os5ilJp7TEzy7H6N68B1VHgauAh6iayY4v72Uy1bWk7wJPUXXw+OR8jn8+1bWm81tiywAnUn0GjwGvp31X/H2BO21fZfuxrgk4BdhY0kbA56k6otxG1Q39P6k6gPyV6vrZESV+J1Vxm4vtX1D9iLi2vJ9ru21yZInfLOlZ4FdUnTw6cSawQWke7H6WGoNQV4+fiOhHkt5N1dS3jlv+0Ul6mKpTxa/qyi2iqXIGFdHPSvPiYcAPnF+EER0bkAIlabSk68rNiPdIOqzEv1xuHryzTLu0vOZoSVPKTYXva4nvXGJTJB3VEh8r6ZYS/3HXzYXlxr8fl/gtksYMxHuOxVe5EbTd9K42274FeJrqxteTBzzZiEFsQJr4JK1BdQPkHeUC7+3AbsCewPO2u98BvwFVj6QtqLql/orqfgmAP1PdeNl14Xlv2/dKugj4qe0LJX0f+IPt0yR9FtjY9kGS9gJ2t/2xfn/TERGxSAbkDMr2DNt3lPnnqG5cXKuXl+wKXGj7Jdt/obpoukWZpth+yPY/qO772LV0H94OuKS8/myqAti1r7PL/CXA9mX7iIhosAF/dEBpYtsMuAV4J3CIpP2AycARtp+iKl43t7xsGnMK2tRu8S2phlV5uuUGvtbt1+p6je1XJD1Ttn+ypxxHjBjhMWPGLNwbjIiIBXL77bc/aXtk9/iAFihVj0H4CXC47WclnQYcR3U/ynFU3X4/1csu+jO3CcAEgLXXXpvJkyfXkUZExJAjqe1ILQPWi6/0ZPoJcJ7tnwLYftz2qy036G1RNp/O3HebjyqxnuKzgJU152FyXfG59lXWr1S2n4vt022Psz1u5Mh5CnlERAywgerFJ6qb6O6z/c2W+Botm+1ONdQMVKMZ71V64I0F1gNupeoUsV7psbc0sBcwqXTdvQ7Yo7x+PHBZy77Gl/k9qAbQTFffiIiGG6gmvndS3aV+t6Q7S+yLwN5lOBRTjer8GQDb95ReefdSDVx5sO1XASQdQnWH/5LARNv3lP0dCVyo6rkzv6cqiJS/50qaQnWH+179+UYjIqJvZCSJNsaNG+dcg4qIweTll19m2rRpvPjii3Wn0qPhw4czatQohg0bNldc0u22x3XffsB78UVERN+bNm0aK6ywAmPGjKGJd9LYZtasWUybNo2xY8d29JoMdRQRsRh48cUXWW211RpZnAAksdpqqy3QGV4KVETEYqKpxanLguaXJr6IaKwxR/287hQAePjED9SdwpCUAhURsRjq6+JeR5FOE19ERDRSClRERPSZc845h4033phNNtmEfffdd5H2lSa+iIjoE/fccw/HH388N954IyNGjGD27NmLtL+cQUVERJ+49tpr+ehHP8qIESMAWHXVVRdpfylQERHRSClQERHRJ7bbbjsuvvhiZs2qHhixqE18uQYVEbEYqqNb+IYbbsgxxxzDe97zHpZcckk222wzzjrrrIXeXwpURET0mfHjxzN+/Pj5b9iBNPFFREQjpUBFREQjpUBFRCwmmv58vwXNLwUqImIxMHz4cGbNmtXYItX1PKjhw4d3/Jp0koiIWAyMGjWKadOmMXPmzLpT6VHXE3U7lQIVEbEYGDZsWMdPqh0s0sQXERGNlAIVERGNlAIVERGNlAIVERGNlAIVERGNlAIVERGNlG7mMaDGHPXzulP4pzpGe46IzuUMKiIiGmlACpSk0ZKuk3SvpHskHVbiq0q6WtID5e8qJS5Jp0iaIukuSZu37Gt82f4BSeNb4m+TdHd5zSmS1NsxIiKi2QbqDOoV4AjbGwBbAQdL2gA4CrjG9nrANWUZ4P3AemWaAJwGVbEBjgW2BLYAjm0pOKcBB7a8bucS7+kYERHRYANSoGzPsH1HmX8OuA9YC9gVOLtsdjawW5nfFTjHlZuBlSWtAbwPuNr2bNtPAVcDO5d1K9q+2dVIied021e7Y0RERIMN+DUoSWOAzYBbgNVtzyirHgNWL/NrAVNbXjatxHqLT2sTp5djdM9rgqTJkiY3ebDFiIihYkALlKTlgZ8Ah9t+tnVdOfPp13HiezuG7dNtj7M9buTIkf2ZRkREdGDACpSkYVTF6TzbPy3hx0vzHOXvEyU+HRjd8vJRJdZbfFSbeG/HiIiIBhuoXnwCzgTus/3NllWTgK6eeOOBy1ri+5XefFsBz5RmuiuBnSStUjpH7ARcWdY9K2mrcqz9uu2r3TEiIqLBBupG3XcC+wJ3S7qzxL4InAhcJOkA4BFgz7LuCmAXYArwd2B/ANuzJR0H3Fa2+6rt2WX+s8BZwLLAL8pEL8eIiIgGG5ACZft6QD2s3r7N9gYO7mFfE4GJbeKTgY3axGe1O0ZERDRbRpKIiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhGSoGKiIhG6qhASfp8D/F/69t0IiIiKp2eQX2ph/j/66tEIiIiWvVaoCRtJ2k7YElJ7+1aLtOngec6OYikiZKekPTHltiXJU2XdGeZdmlZd7SkKZLul/S+lvjOJTZF0lEt8bGSbinxH0tausSXKctTyvoxnX4wERFRr6Xms/7M8nc4MLElbuAx4NAOj3MW8F3gnG7xb9k+qTUgaQNgL2BDYE3gV5LWL6tPBXYEpgG3SZpk+17gP8u+LpT0feAA4LTy9ynbb5S0V9nuYx3mHBERNer1DMr2WNtjgfO65sv0Bttb257UyUFs/xaY3WFOuwIX2n7J9l+AKcAWZZpi+yHb/wAuBHaVJGA74JLy+rOB3Vr2dXaZvwTYvmwfEREN19E1KNv7dc1LWqJ1WsTjHyLprtIEuEqJrQVMbdlmWon1FF8NeNr2K93ic+2rrH+mbB8REQ3XaS++zSXdJOlvwMtleqX8XVinAesCmwIzgG8swr4WmaQJkiZLmjxz5sw6U4mICDrvxXc2cB0wDnhDmcaWvwvF9uO2X7X9GnAGVRMewHRgdMumo0qsp/gsYGVJS3WLz7Wvsn6lsn27fE63Pc72uJEjRy7s24qIiD7SaYFaBzjG9n22H2mdFvbAktZoWdwd6OrhNwnYq/TAGwusB9wK3AasV3rsLU3VkWKSbVMVzz3K68cDl7Xsa3yZ3wO4tmwfERENN79efF0uBXYCrlyYg0i6ANgWGCFpGnAssK2kTal6BD4MfAbA9j2SLgLupWpGPNj2q2U/h5QclgQm2r6nHOJI4EJJxwO/Z07vwzOBcyVNoeqksdfC5B8REQOv0wI1HLhU0vVU3cv/qbUDRU9s790mfGabWNf2JwAntIlfAVzRJv4Qc5oIW+MvAh+dX34REdE8nRaoe8sUERExIDoqULa/0t+JREREtOr4PiZJO0o6U9LlZXlcGQYpIiKiz3V6H9ShVPctPQC8u4RfAI7vp7wiImKI6/QM6nBgB9snAq+V2J+AN/VLVhERMeR1WqBWYM4wQ133EQ0D/tHnGUVERNB5gfotcFS32OeobpCNiIjoc512Mz8UuFzSgcAKku6nehbUB/sts4iIGNI67WY+Q9LbqW6GXZuque/WMo5eREREn+v0DIoyht0tZYqIiOhXPRYoSVOZ0yGiR7bX7tOMIiIi6P0Map+W+bdTjQp+CvAI1ejmhzDvI9wjIiL6RI8FyvZvuuYlnQq8z/b0ltgvgF9S84MGIyJi8dRpN/M1gee7xZ5nzqPVIyIi+lSnBWoSMKmMx/cWSTtRPSNqUv+lFhERQ1mnBeog4Cbg+8Ad5e8tJR4REdHnOr0P6kWqkSS6jyYRERHRL3rrZv5u278t8z0+VsP2tf2RWEREDG29nUF9D9iozPf0eHYDb+jTjCIiIui9m/lGLfNjByadiIiISqcPLNxU0uhusdGSNumftCIiYqjrtBffj6ie/9RqaeDcvk0nIiKi0mmBWtv2Q60B2w8CY/o8o4iICDovUNMkbd4aKMuP9n1KERERnT9u41vAZZL+C3gQWBf4PHBCfyUWERFDW6c36p4h6WngAGA01QMLj7B9SX8mFxERQ9eCPLDwYuDifswlIiLinzrtZn6KpK27xbaWdHKHr58o6QlJf2yJrSrpakkPlL+rlLjK8aZIuqv12pek8WX7BySNb4m/TdLd5TWnSFJvx4iIiObrtJPE3sDkbrHbgY93+PqzgJ27xY4CrrG9HnANc8b5ez+wXpkmAKdBVWyAY4EtgS2AY1sKzmnAgS2v23k+x4iIiIbrtEC5zbZLdvr6Mqbf7G7hXYGzy/zZwG4t8XNcuRlYWdIawPuAq23Ptv0UcDWwc1m3ou2bbZvqKb+7zecYERHRcJ0WqN8Bx0taAqD8/UqJL6zVbc8o848Bq5f5tag6YXSZVmK9xae1ifd2jHlImiBpsqTJM2fOXIi3ExERfanTAnUYsAMwQ9KtwIyyfGhfJFHOfNwX+1rYY9g+3fY42+NGjhzZn6lEREQHOu1m3nWj7hZU3cwfp2ouu5XqcfAL43FJa9ieUZrpnijx6eUYXUaV2HRg227xX5f4qDbb93aMiIhouE7PoABWo+qg8EXgOmBzqjOrhTUJ6OqJNx64rCW+X+nNtxXwTGmmuxLYSdIqpXPETsCVZd2zkrYqvff267avdseIiIiG6/UMStIw4MPAJ6k6KUwBLgDWBva03dEZiaQLqM5+RkiaRtUb70TgIkkHAI8Ae5bNrwB2Kcf6O7A/gO3Zko4DbivbfdV2V8eLz1L1FFwW+EWZ6OUYERHRcPNr4nsceI3qy/9Y23cASPrsghzE9t49rNq+zbYGDu5hPxOBiW3ik5nzcMXW+Kx2x4iIiOabXxPfXcDKVE17b8+NrhERMVB6LVC2t6UaGPYqqsFhH5N0ObAc8z4fKiIios/Mt5OE7UdsH1dGY9ieqov5a8AfyujmERERfW5BevFh+3rbE4B/oboH6q39klVERAx5C1Sguth+0fYFtt/f1wlFRETAQhaoiIiI/pYCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjZQCFRERjVR7gZL0sKS7Jd0paXKJrSrpakkPlL+rlLgknSJpiqS7JG3esp/xZfsHJI1vib+t7H9Kea0G/l1GRMSCqr1AFe+1vantcWX5KOAa2+sB15RlgPcD65VpAnAaVAUNOBbYEtgCOLarqJVtDmx53c79/3YiImJRNaVAdbcrcHaZPxvYrSV+jis3AytLWgN4H3C17dm2nwKuBnYu61a0fbNtA+e07CsiIhqsCQXKwFWSbpc0ocRWtz2jzD8GrF7m1wKmtrx2Won1Fp/WJj4PSRMkTZY0eebMmYvyfiIiog8sVXcCwDa2p0t6PXC1pD+1rrRtSe7vJGyfDpwOMG7cuH4/XkRE9K72Myjb08vfJ4BLqa4hPV6a5yh/nyibTwdGt7x8VIn1Fh/VJh4REQ1Xa4GStJykFbrmgZ2APwKTgK6eeOOBy8r8JGC/0ptvK+CZ0hR4JbCTpFVK54idgCvLumclbVV67+3Xsq+IiGiwupv4VgcuLT2/lwLOt/1LSbcBF0k6AHgE2LNsfwWwCzAF+DuwP4Dt2ZKOA24r233V9uwy/1ngLGBZ4BdlioiIhqu1QNl+CNikTXwWsH2buIGDe9jXRGBim/hkYKNFTjYiIgZU7degIiIi2kmBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRkqBioiIRlqq7gQiAsYc9fO6U/inh0/8QN0pRAA5g4qIiIYaEgVK0s6S7pc0RdJRdecTERHzt9g38UlaEjgV2BGYBtwmaZLte+vNLCJiwQ2l5uChcAa1BTDF9kO2/wFcCOxac04RETEfsl13Dv1K0h7AzrY/XZb3Bba0fUi37SYAE8rim4D7BzTRno0Anqw7iQbK5zKvfCbt5XNpr0mfyzq2R3YPLvZNfJ2yfTpwet15dCdpsu1xdefRNPlc5pXPpL18Lu0Nhs9lKDTxTQdGtyyPKrGIiGiwoVCgbgPWkzRW0tLAXsCkmnOKiIj5WOyb+Gy/IukQ4EpgSWCi7XtqTmtBNK7ZsSHyucwrn0l7+Vzaa/znsth3koiIiMFpKDTxRUTEIJQCFRERjZQCFRERjbTYd5KIiKFH0tbAGFq+42yfU1tCDSFprO2/zC/WFOkk0VCSPgJsAxi43valNacUDSTpUOBHtp+qO5emkHQusC5wJ/BqCdv25+rLqhkk3WF7826x222/ra6cepMzqAaS9D3gjcAFJfQZSTvYPrjGtGol6TmqYt3qGWAycITthwY+q0ZYnWoA5DuAicCVzq/OccAG+RzmkPRmYENgpfLjt8uKwPB6spq/nEE1kKQ/AW/p+gcmaQngHttvqTez+kg6jmo0+vMBUd1wvS5wB/B/bG9bX3b1kiRgJ2B/qi/ni4AzbT9Ya2I1kXQx8DnbM+rOpSkk7QrsBnyYuQcqeA640PaNtSQ2HzmDaqYpwNrAI2V5dIkNZR+2vUnL8umS7rR9pKQv1pZVA9i2pMeAx4BXgFWASyRdbfvf682uFiOAeyXdCrzUFbT94fpSqpfty4DLJL3D9k1159OpFKhmWgG4r/wDA3g7MFnSJBiy/9D+LmlP4JKyvAfwYpkfss0Akg4D9qMalfoHwBdsv1zOuh8AhmKB+nLdCTSNpH+3/V/AxyXt3X19U6/PpUA105fqTqCBPgF8G/geVUG6GdhH0rLAIb29cDG3KvAR24+0Bm2/JumDNeVUK9u/kbQ61Q87gFttP1FnTg1wX/k7udYsFlCuQTWYpBWZu5vs7BrTiQaT9HpaLnbb/muN6dSqnGn/N/BrquuV76I6s7ykt9cNBZKG236xW2yE7aY8F2ouKVANVB6e+FWqJqzXqP6R2fYbak2sRpJGAgcy770tn6orpyaQ9CHgm8CawBPAOsB9tjesNbEaSfoDsGPXWVP5f+dX3a5hDkmS7gIm2L65LP8r8HXb69ebWXtp4mumLwAbNfVXTU0uA34H/Io597YEHA9sRfUFvJmk9wL71JxT3Zbo1qQ3i4ya0+UTwERJv6b6UbMasF2tGfUiBaqZHgT+XncSDfM620fWnUQDvWx7lqQlJC1h+zpJJ9edVM1+KelK5txH+DHgihrzaQzbd0s6ATiXqov5u21PqzmtHqVANdPRwI2SbmHubrKN7GkzQP5X0i6280Uzt6clLQ/8FjhP0hPA32rOqVa2v1Cart5ZQqdnJJaKpDOp7h/cGFif6t/Vd2yfWm9m7eUaVAOV7uXXA3dTXYMCwPbZtSVVszKSxHJUBftl5lyXW7HWxGoiaRXbT0laDniBqgnrE8BKwHm2Z9WaYDSSpMOBb7cMArAS8E3bB9SbWXspUA0k6fe2N6s7j2iucqb0JHADcCNwg+0/15tVvSRdb3ubNsNiDekfM1D1CLb9bA/r1m5qr88UqAaS9DXgYeBy5m7iG3LdzCW92fafJG3ebr3tOwY6p6aQtD6wdcs0kur+sBvKTZkRwNyDxEq6xvb27dY1TQpUA0lqN/T9kOxmLul02xMkXddmtW03tgfSQJK0LrALcBiwlu1la06pNpLOtb3v/GJDSWurTPcWmia32KSTRAPZHlt3Dk1he0L5+966c2mS8ryjrYF3UI3V+BBldA2qAXSHsrnuAZO0FNDIx0kMIPcw3265MVKgGkjSfu3iQ/mBa5I+CvzS9nOS/h+wOXCc7d/XnFpdrqcqRN8CLrU95G9LkHQ08EVgWUld11sE/AM4vbbEmuH1kv6N6vPomqcsj6wvrd6lia+BJH2nZXE4sD1wh+09akqpdpLusr2xpG2obk79b+BLtresObVaSPoX5lx72oLqx+YdwE3ATUP4+VhI+rrto+vOo0kkHdvbettfGahcFkQK1CAgaWWqZ7bsXHcudelqJ5f0deBu2+c3ue18oEl6HfAp4HBgrO0la06pNpJ2B661/UxZXhnY1vbP6s2s+SQdbfvrdefRJQVqEJA0DPij7TfVnUtdJP0vMB3Ykap57wWqUaqH5Phq5f6VdzDnLGozqsdr3ETVi2/IDoxanhO2abdYfsx0oGk9+nINqoEkXc6cC5dLABtQPSV1KNsT2Bk4yfbTktagGrNwqJpCac6jGlj4Ntsv1JtSY7Qbdy/fdZ1R3Qm0yn+0ZjqpZf4V4JEmj5c1QNYAfm77JUnbUg3VMmQ7jdju6MJ2Gcbm0P7Op2EmS/om0DV8z8HA7TXmM5g0qkktI/w2kO3fdE3A/VRNW0PdT4BXJb2RqkfWaOD8elMaFN45/00WO4dS9dz7cZleoipSMX85g4r2JG0FnAjMBo6jGnF4BLCEpP1s/7LO/Gr2mu1XJH0E+I7t70gaql3Moxe2/wYcVXceg9TFdSfQKgWqWb5LdR/HSsC1wPtt3yzpzVSPDhjKBeplSXsD+wEfKrFhNeYTDVWGgPo88z7ccsiPOiJpFPAdYBuq5rzfAYd1XUKw/bUa05tHClSzLGX7KgBJX+166mUZi67ezOq3P3AQcILtv0gaS3WGGb0biv/jXAx8H/gBebhldz+kahr/aFnep8R2rC2jXqSbeYN0G9Bxru6eTev+WQdJS1M9wwbgftsv15lPk0h6XbvRJCR90vZZNaRUG0m32x7qQxu11UMX/HliTZFOEs2yiaRny+MCNi7zXctvrTu5OpWeew9Q9cz6HvBnSe+uNakGkLS1pHuBPwxQF5cAAAatSURBVJXlTSR9r2v9UCtOxeWSPitpDUmrdk11J9UQsyTtI2nJMu0DNPbZYTmDikFB0u3Ax23fX5bXBy4Y6r+Uy1OX9wAmtYxW/UfbG9WbWX3yNICeSVqH6hrUO6iuQd0IfK6pz4PKNagYLIZ1FScA238uI2wMebandrtGOaSvu+RpAD2z/Qjw4brz6FQKVAwWt0v6AfCjsvwJYHKN+TTF1PLoDZeCfRhwX8051SpPA5iXpC/1stq2jxuwZBZAmvhiUJC0DNXNltuU0O+A79l+qedXLf4kjQC+DexA1WPvKqpuw429rtDf8jSAeUk6ok14OeAAYDXbyw9wSh1JgYrGk7QkcI/tN9edS9NIGml7Zt15NFmeBjA3SStQnWkfQDXG5zdsP1FvVu2lF180nu1XgfslrV13Lg10g6SrJB1QvohjXn8Dhvx1qdKb8XjgLqrLO5vbPrKpxQlyDSoGj1WAeyTdSvWFA4DtQXPBtz/YXl/SFsBewDGly/mFtn80n5cutro9DWBJ8jQAJP038BGqcSzfavv5mlPqSJr4otHK4LCrM++PqXcBM2yfOfBZNVO5HvVN4BND/IGF72FOgep6GsCQHnBZ0mtUg+a+wtwjlouqk8SKtSQ2HzmDiqY7GTja9t2tQUmzga8BQ7pASVoR2J3qDGpd4FKqR8APOeWGdjPv8E6W9BLwIHCM7WsGPLma2R6Ul3NyBhWNJuk222/vYd3dtof6CBt/AX4GXGT7prrzaarS0WYj4LyhfBPzYJMzqGi63i78LztgWTTXG5xfmfNVOtr8oVsX9Gi4FKhousmSDrR9RmtQ0qcZwk9JlXSy7cOBSZLaXVMY0p1HemL7f+rOITqXJr5oNEmrU11X+QdzCtI4YGlgd9uP1ZVbnSS9zfbtpUNAl65/zCpPY44Y1FKgYlCQ9F6qawhQ3bR7bZ351E3SrsAo26eW5VuBkVRF6kjbjXoyasTCSIGKGIQk3QDsZXtqWb6Takif5YAf2t6+zvwi+kKuQUUMTkt3Fafi+jL+3ixJy9WVVERfGpR94yOCVVoXbB/SsjhygHOJ6BcpUBGD0y2SDuwelPQZ4NYa8onoc7kGFTEISXo91Q26LwF3lPDbgGWA3Ww/XlduEX0lBSpiEJO0HbBhWRzyvRtj8ZICFRERjZRrUBER0UgpUBER0UgpUBGDkKQxkiwp9zLGYisFKqKPSXpY0guSnpf0mKSzJC1fQx73lByel/SqpBdblr840PlELKgUqIj+8SHbywObApsBRw90ArY3tL18yeN3wCFdy7a/NtD5RCyoFKiIflRGW7+SqlAh6cPlzOZpSb+W9JaubSUdJelBSc9JulfS7i3rlpR0kqQnJT0EfGBhc5L0R0kfalkeVva7WUvT4QRJj0qaIenzLdsu0ZLnLEkXSVp1YXOJ6E0KVEQ/kjQKeD8wRdL6wAXA4VTDEV0BXC5p6bL5g8C7gJWArwA/krRGWXcg8EGqs7FxwB6LkNY5wD4ty7sAM2z/viX2XmA9YCfgSEk7lPihwG7Ae4A1gaeAUxchl4gepUBF9I+fSXoOmAo8ARwLfAz4ue2rbb8MnET1VOCtAWxfbPtR26/Z/jHwALBF2d+ewMm2p9qeDXx9EXL7EbCLpBXL8r7Aud22+Yrtv9m+G/ghsHeJHwQcY3ua7ZeALwN7pLNG9IcUqIj+sZvtFYBtgTcDI6jOOB7p2sD2a1QFbC0ASftJurM0/z1N9fyrEWXzNcu2XR5hIdl+FLgB+FdJK1Od4Z3XbbPux1qzzK8DXNqS433Aq8DqC5tPRE/yqyeiH9n+jaSzqM6Wbgfe2rVOkoDRwHRJ6wBnUD3T6Sbbr5ZnPKlsPqNs22XtRUztbODTVN8BN9me3m39aOBPLcd6tMxPBT5l+4ZFPH7EfOUMKqL/nQzsCEwCPiBpe0nDgCOoBnu9kepBgwZmAkjanzlPEAa4CPicpFGSVgGOWsScfgZsDhxGdU2qu/+Q9DpJGwL7Az8u8e8DJ5SCiqSR5em+EX0uBSqin9meSVUEvkTVOeE7wJPAh6i6o//D9r3AN4CbgMepzrRaz1LOoOoN+Aeq0ct/uog5vQD8BBjbw75+A0wBrgFOsn1ViX+bqtBeVa6x3QxsuSi5RPQkg8VGDFGSvgSsb3ufltgY4C/AMNuv1JRaBJBrUBFDUrl36QCqHnwRjZQmvohBrmX4ou7Tu3rY/kCqzg6/sP3bgc02onNp4ouIiEbKGVRERDRSClRERDRSClRERDRSClRERDRSClRERDTS/wcRAu+QlJ7xmAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#df1.loc[True].plot.bar()\n",
    "ax = df1.loc[True].plot.bar()\n",
    "ax.set_xlabel(\"Road_Type\", fontsize=12)\n",
    "ax.set_ylabel(\"Accident\", fontsize=12)\n",
    "ax.set_title(\"Road_type vs Accident\")\n",
    "ax.legend('count',loc='best')\n",
    "rcParams.update({'figure.autolayout': True})\n",
    "plt.rc('xtick', labelsize=12) \n",
    "plt.rc('ytick', labelsize=12)\n",
    "plt.tight_layout()\n",
    "plt.savefig('/Users/pprusty05/google_drive/Data_Mining/Project/plots/Road_vs_Accident.pdf')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# missing values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#missing data in the dataset\n",
    "def find_missing_values(df):\n",
    "    missing_value_dict = {}\n",
    "    for column_name in df.columns:\n",
    "        missing_value_count = df.where(df[column_name].isNull()).count()\n",
    "        missing_value_percent = (missing_value_count / df.count()) * 100\n",
    "        missing_value_dict[column_name] = {}\n",
    "        missing_value_dict[column_name]['count'] =  missing_value_count\n",
    "        missing_value_dict[column_name]['percent'] =  missing_value_percent\n",
    "\n",
    "    missing_value_pd = pd.DataFrame(missing_value_dict).T\n",
    "    return missing_value_pd\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "missing_value_pd=find_missing_values(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>percent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>ID</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Source</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TMC</td>\n",
       "      <td>728071.0</td>\n",
       "      <td>24.478446</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Severity</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Start_Time</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>End_Time</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Start_Lat</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Start_Lng</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>End_Lat</td>\n",
       "      <td>2246264.0</td>\n",
       "      <td>75.521554</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>End_Lng</td>\n",
       "      <td>2246264.0</td>\n",
       "      <td>75.521554</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Distance</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Description</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.000034</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Number</td>\n",
       "      <td>1917605.0</td>\n",
       "      <td>64.471722</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Street</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Side</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>City</td>\n",
       "      <td>83.0</td>\n",
       "      <td>0.002791</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>County</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>State</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Zipcode</td>\n",
       "      <td>880.0</td>\n",
       "      <td>0.029586</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Country</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Timezone</td>\n",
       "      <td>3163.0</td>\n",
       "      <td>0.106343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Airport_Code</td>\n",
       "      <td>5691.0</td>\n",
       "      <td>0.191337</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Weather_Timestamp</td>\n",
       "      <td>36705.0</td>\n",
       "      <td>1.234057</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Temperature</td>\n",
       "      <td>56063.0</td>\n",
       "      <td>1.884892</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Wind_Chill</td>\n",
       "      <td>1852623.0</td>\n",
       "      <td>62.286965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Humidity</td>\n",
       "      <td>59173.0</td>\n",
       "      <td>1.989453</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Pressure</td>\n",
       "      <td>48142.0</td>\n",
       "      <td>1.618580</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Visibility</td>\n",
       "      <td>65691.0</td>\n",
       "      <td>2.208595</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Wind_Direction</td>\n",
       "      <td>45101.0</td>\n",
       "      <td>1.516339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Wind_Speed</td>\n",
       "      <td>440840.0</td>\n",
       "      <td>14.821464</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Precipitation</td>\n",
       "      <td>1998358.0</td>\n",
       "      <td>67.186716</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Weather_Condition</td>\n",
       "      <td>65932.0</td>\n",
       "      <td>2.216697</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Amenity</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Bump</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Crossing</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Give_Way</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Junction</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>No_Exit</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Railway</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Roundabout</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Station</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Stop</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Traffic_Calming</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Traffic_Signal</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Turning_Loop</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Sunrise_Sunset</td>\n",
       "      <td>93.0</td>\n",
       "      <td>0.003127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Civil_Twilight</td>\n",
       "      <td>93.0</td>\n",
       "      <td>0.003127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Nautical_Twilight</td>\n",
       "      <td>93.0</td>\n",
       "      <td>0.003127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Astronomical_Twilight</td>\n",
       "      <td>93.0</td>\n",
       "      <td>0.003127</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           count    percent\n",
       "ID                           0.0   0.000000\n",
       "Source                       0.0   0.000000\n",
       "TMC                     728071.0  24.478446\n",
       "Severity                     0.0   0.000000\n",
       "Start_Time                   0.0   0.000000\n",
       "End_Time                     0.0   0.000000\n",
       "Start_Lat                    0.0   0.000000\n",
       "Start_Lng                    0.0   0.000000\n",
       "End_Lat                2246264.0  75.521554\n",
       "End_Lng                2246264.0  75.521554\n",
       "Distance                     0.0   0.000000\n",
       "Description                  1.0   0.000034\n",
       "Number                 1917605.0  64.471722\n",
       "Street                       0.0   0.000000\n",
       "Side                         0.0   0.000000\n",
       "City                        83.0   0.002791\n",
       "County                       0.0   0.000000\n",
       "State                        0.0   0.000000\n",
       "Zipcode                    880.0   0.029586\n",
       "Country                      0.0   0.000000\n",
       "Timezone                  3163.0   0.106343\n",
       "Airport_Code              5691.0   0.191337\n",
       "Weather_Timestamp        36705.0   1.234057\n",
       "Temperature              56063.0   1.884892\n",
       "Wind_Chill             1852623.0  62.286965\n",
       "Humidity                 59173.0   1.989453\n",
       "Pressure                 48142.0   1.618580\n",
       "Visibility               65691.0   2.208595\n",
       "Wind_Direction           45101.0   1.516339\n",
       "Wind_Speed              440840.0  14.821464\n",
       "Precipitation          1998358.0  67.186716\n",
       "Weather_Condition        65932.0   2.216697\n",
       "Amenity                      0.0   0.000000\n",
       "Bump                         0.0   0.000000\n",
       "Crossing                     0.0   0.000000\n",
       "Give_Way                     0.0   0.000000\n",
       "Junction                     0.0   0.000000\n",
       "No_Exit                      0.0   0.000000\n",
       "Railway                      0.0   0.000000\n",
       "Roundabout                   0.0   0.000000\n",
       "Station                      0.0   0.000000\n",
       "Stop                         0.0   0.000000\n",
       "Traffic_Calming              0.0   0.000000\n",
       "Traffic_Signal               0.0   0.000000\n",
       "Turning_Loop                 0.0   0.000000\n",
       "Sunrise_Sunset              93.0   0.003127\n",
       "Civil_Twilight              93.0   0.003127\n",
       "Nautical_Twilight           93.0   0.003127\n",
       "Astronomical_Twilight       93.0   0.003127"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "missing_value_pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11817022.0"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "missing_value_pd['count'].sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# drop columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#dropping some columns\n",
    "df=df.drop('ID')\n",
    "df=df.drop('Description')\n",
    "df=df.drop('Zipcode')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "#find the datatype of each column\n",
    "def get_data_types(df):\n",
    "    col_data_type = {}\n",
    "    for types in df.dtypes:\n",
    "        col_data_type[types[0]] = types[1]\n",
    "    return col_data_type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Source': 'string',\n",
       " 'TMC': 'double',\n",
       " 'Severity': 'int',\n",
       " 'Start_Time': 'timestamp',\n",
       " 'End_Time': 'timestamp',\n",
       " 'Start_Lat': 'double',\n",
       " 'Start_Lng': 'double',\n",
       " 'End_Lat': 'double',\n",
       " 'End_Lng': 'double',\n",
       " 'Distance': 'double',\n",
       " 'Number': 'double',\n",
       " 'Street': 'string',\n",
       " 'Side': 'string',\n",
       " 'City': 'string',\n",
       " 'County': 'string',\n",
       " 'State': 'string',\n",
       " 'Country': 'string',\n",
       " 'Timezone': 'string',\n",
       " 'Airport_Code': 'string',\n",
       " 'Weather_Timestamp': 'timestamp',\n",
       " 'Temperature': 'double',\n",
       " 'Wind_Chill': 'double',\n",
       " 'Humidity': 'double',\n",
       " 'Pressure': 'double',\n",
       " 'Visibility': 'double',\n",
       " 'Wind_Direction': 'string',\n",
       " 'Wind_Speed': 'double',\n",
       " 'Precipitation': 'double',\n",
       " 'Weather_Condition': 'string',\n",
       " 'Amenity': 'boolean',\n",
       " 'Bump': 'boolean',\n",
       " 'Crossing': 'boolean',\n",
       " 'Give_Way': 'boolean',\n",
       " 'Junction': 'boolean',\n",
       " 'No_Exit': 'boolean',\n",
       " 'Railway': 'boolean',\n",
       " 'Roundabout': 'boolean',\n",
       " 'Station': 'boolean',\n",
       " 'Stop': 'boolean',\n",
       " 'Traffic_Calming': 'boolean',\n",
       " 'Traffic_Signal': 'boolean',\n",
       " 'Turning_Loop': 'boolean',\n",
       " 'Sunrise_Sunset': 'string',\n",
       " 'Civil_Twilight': 'string',\n",
       " 'Nautical_Twilight': 'string',\n",
       " 'Astronomical_Twilight': 'string'}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "col_data_type = get_data_types(df)\n",
    "col_data_type"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Handle Categorical Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.ml.feature import StringIndexer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting :  Source\n",
      "Done : Source\n",
      "Starting :  Street\n",
      "Done : Street\n",
      "Starting :  Side\n",
      "Done : Side\n",
      "Starting :  City\n",
      "Done : City\n",
      "Starting :  County\n",
      "Done : County\n",
      "Starting :  State\n",
      "Done : State\n",
      "Starting :  Country\n",
      "Done : Country\n",
      "Starting :  Timezone\n",
      "Done : Timezone\n",
      "Starting :  Airport_Code\n",
      "Done : Airport_Code\n",
      "Starting :  Wind_Direction\n",
      "Done : Wind_Direction\n",
      "Starting :  Weather_Condition\n",
      "Done : Weather_Condition\n",
      "Starting :  Sunrise_Sunset\n",
      "Done : Sunrise_Sunset\n",
      "Starting :  Civil_Twilight\n",
      "Done : Civil_Twilight\n",
      "Starting :  Nautical_Twilight\n",
      "Done : Nautical_Twilight\n",
      "Starting :  Astronomical_Twilight\n",
      "Done : Astronomical_Twilight\n"
     ]
    }
   ],
   "source": [
    "for keys in col_data_type:\n",
    "    if col_data_type[keys] == 'string':\n",
    "        print('Starting :  ' + keys)\n",
    "        indexer = StringIndexer(inputCol=keys, outputCol=keys+'_index').fit(df)\n",
    "        df = indexer.setHandleInvalid(\"keep\").transform(df)\n",
    "        print('Done : ' + keys)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting :  Amenity\n",
      "Done :  Amenity\n",
      "Starting :  Bump\n",
      "Done :  Bump\n",
      "Starting :  Crossing\n",
      "Done :  Crossing\n",
      "Starting :  Give_Way\n",
      "Done :  Give_Way\n",
      "Starting :  Junction\n",
      "Done :  Junction\n",
      "Starting :  No_Exit\n",
      "Done :  No_Exit\n",
      "Starting :  Railway\n",
      "Done :  Railway\n",
      "Starting :  Roundabout\n",
      "Done :  Roundabout\n",
      "Starting :  Station\n",
      "Done :  Station\n",
      "Starting :  Stop\n",
      "Done :  Stop\n",
      "Starting :  Traffic_Calming\n",
      "Done :  Traffic_Calming\n",
      "Starting :  Traffic_Signal\n",
      "Done :  Traffic_Signal\n",
      "Starting :  Turning_Loop\n",
      "Done :  Turning_Loop\n"
     ]
    }
   ],
   "source": [
    "##Convert all boolean\n",
    "for keys in col_data_type:\n",
    "    if col_data_type[keys] == 'boolean':\n",
    "        print('Starting :  ' + keys)\n",
    "        df = df.withColumn(keys,df[keys].cast('int'))\n",
    "        print('Done :  ' + keys)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "col_data_type = get_data_types(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Source': 'string',\n",
       " 'TMC': 'double',\n",
       " 'Severity': 'int',\n",
       " 'Start_Time': 'timestamp',\n",
       " 'End_Time': 'timestamp',\n",
       " 'Start_Lat': 'double',\n",
       " 'Start_Lng': 'double',\n",
       " 'End_Lat': 'double',\n",
       " 'End_Lng': 'double',\n",
       " 'Distance': 'double',\n",
       " 'Number': 'double',\n",
       " 'Street': 'string',\n",
       " 'Side': 'string',\n",
       " 'City': 'string',\n",
       " 'County': 'string',\n",
       " 'State': 'string',\n",
       " 'Country': 'string',\n",
       " 'Timezone': 'string',\n",
       " 'Airport_Code': 'string',\n",
       " 'Weather_Timestamp': 'timestamp',\n",
       " 'Temperature': 'double',\n",
       " 'Wind_Chill': 'double',\n",
       " 'Humidity': 'double',\n",
       " 'Pressure': 'double',\n",
       " 'Visibility': 'double',\n",
       " 'Wind_Direction': 'string',\n",
       " 'Wind_Speed': 'double',\n",
       " 'Precipitation': 'double',\n",
       " 'Weather_Condition': 'string',\n",
       " 'Amenity': 'int',\n",
       " 'Bump': 'int',\n",
       " 'Crossing': 'int',\n",
       " 'Give_Way': 'int',\n",
       " 'Junction': 'int',\n",
       " 'No_Exit': 'int',\n",
       " 'Railway': 'int',\n",
       " 'Roundabout': 'int',\n",
       " 'Station': 'int',\n",
       " 'Stop': 'int',\n",
       " 'Traffic_Calming': 'int',\n",
       " 'Traffic_Signal': 'int',\n",
       " 'Turning_Loop': 'int',\n",
       " 'Sunrise_Sunset': 'string',\n",
       " 'Civil_Twilight': 'string',\n",
       " 'Nautical_Twilight': 'string',\n",
       " 'Astronomical_Twilight': 'string',\n",
       " 'Source_index': 'double',\n",
       " 'Street_index': 'double',\n",
       " 'Side_index': 'double',\n",
       " 'City_index': 'double',\n",
       " 'County_index': 'double',\n",
       " 'State_index': 'double',\n",
       " 'Country_index': 'double',\n",
       " 'Timezone_index': 'double',\n",
       " 'Airport_Code_index': 'double',\n",
       " 'Wind_Direction_index': 'double',\n",
       " 'Weather_Condition_index': 'double',\n",
       " 'Sunrise_Sunset_index': 'double',\n",
       " 'Civil_Twilight_index': 'double',\n",
       " 'Nautical_Twilight_index': 'double',\n",
       " 'Astronomical_Twilight_index': 'double'}"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "col_data_type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "##remove all string data types as they have been converted already\n",
    "for key in col_data_type:\n",
    "    if col_data_type[key] == 'string':\n",
    "        df = df.drop(key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "##Rename all _index to match \n",
    "for key in col_data_type:\n",
    "    if '_index' in key:\n",
    "        newKey = key.replace('_index', '')\n",
    "        df = df.withColumnRenamed(key, newKey)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting :  Start_Time\n",
      "Done :  Start_Time\n",
      "Starting :  End_Time\n",
      "Done :  End_Time\n",
      "Starting :  Weather_Timestamp\n",
      "Done :  Weather_Timestamp\n"
     ]
    }
   ],
   "source": [
    "#Convert timestamp to epoch\n",
    "for keys in col_data_type:\n",
    "    if col_data_type[keys] == 'timestamp':\n",
    "        print('Starting :  ' + keys)\n",
    "        df = df.withColumn(keys,df[keys].cast('double'))\n",
    "        print('Done :  ' + keys)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'TMC': 'double',\n",
       " 'Severity': 'int',\n",
       " 'Start_Time': 'double',\n",
       " 'End_Time': 'double',\n",
       " 'Start_Lat': 'double',\n",
       " 'Start_Lng': 'double',\n",
       " 'End_Lat': 'double',\n",
       " 'End_Lng': 'double',\n",
       " 'Distance': 'double',\n",
       " 'Number': 'double',\n",
       " 'Weather_Timestamp': 'double',\n",
       " 'Temperature': 'double',\n",
       " 'Wind_Chill': 'double',\n",
       " 'Humidity': 'double',\n",
       " 'Pressure': 'double',\n",
       " 'Visibility': 'double',\n",
       " 'Wind_Speed': 'double',\n",
       " 'Precipitation': 'double',\n",
       " 'Amenity': 'int',\n",
       " 'Bump': 'int',\n",
       " 'Crossing': 'int',\n",
       " 'Give_Way': 'int',\n",
       " 'Junction': 'int',\n",
       " 'No_Exit': 'int',\n",
       " 'Railway': 'int',\n",
       " 'Roundabout': 'int',\n",
       " 'Station': 'int',\n",
       " 'Stop': 'int',\n",
       " 'Traffic_Calming': 'int',\n",
       " 'Traffic_Signal': 'int',\n",
       " 'Turning_Loop': 'int',\n",
       " 'Source': 'double',\n",
       " 'Street': 'double',\n",
       " 'Side': 'double',\n",
       " 'City': 'double',\n",
       " 'County': 'double',\n",
       " 'State': 'double',\n",
       " 'Country': 'double',\n",
       " 'Timezone': 'double',\n",
       " 'Airport_Code': 'double',\n",
       " 'Wind_Direction': 'double',\n",
       " 'Weather_Condition': 'double',\n",
       " 'Sunrise_Sunset': 'double',\n",
       " 'Civil_Twilight': 'double',\n",
       " 'Nautical_Twilight': 'double',\n",
       " 'Astronomical_Twilight': 'double'}"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "col_data_type = get_data_types(df)\n",
    "col_data_type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting :  Severity\n",
      "Done :  Severity\n",
      "Starting :  Amenity\n",
      "Done :  Amenity\n",
      "Starting :  Bump\n",
      "Done :  Bump\n",
      "Starting :  Crossing\n",
      "Done :  Crossing\n",
      "Starting :  Give_Way\n",
      "Done :  Give_Way\n",
      "Starting :  Junction\n",
      "Done :  Junction\n",
      "Starting :  No_Exit\n",
      "Done :  No_Exit\n",
      "Starting :  Railway\n",
      "Done :  Railway\n",
      "Starting :  Roundabout\n",
      "Done :  Roundabout\n",
      "Starting :  Station\n",
      "Done :  Station\n",
      "Starting :  Stop\n",
      "Done :  Stop\n",
      "Starting :  Traffic_Calming\n",
      "Done :  Traffic_Calming\n",
      "Starting :  Traffic_Signal\n",
      "Done :  Traffic_Signal\n",
      "Starting :  Turning_Loop\n",
      "Done :  Turning_Loop\n"
     ]
    }
   ],
   "source": [
    "##Convert all int to doubles \n",
    "for keys in col_data_type:\n",
    "    if col_data_type[keys] == 'int':\n",
    "        print('Starting :  ' + keys)\n",
    "        df = df.withColumn(keys,df[keys].cast('double'))\n",
    "        print('Done :  ' + keys)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'TMC': 'double',\n",
       " 'Severity': 'double',\n",
       " 'Start_Time': 'double',\n",
       " 'End_Time': 'double',\n",
       " 'Start_Lat': 'double',\n",
       " 'Start_Lng': 'double',\n",
       " 'End_Lat': 'double',\n",
       " 'End_Lng': 'double',\n",
       " 'Distance': 'double',\n",
       " 'Number': 'double',\n",
       " 'Weather_Timestamp': 'double',\n",
       " 'Temperature': 'double',\n",
       " 'Wind_Chill': 'double',\n",
       " 'Humidity': 'double',\n",
       " 'Pressure': 'double',\n",
       " 'Visibility': 'double',\n",
       " 'Wind_Speed': 'double',\n",
       " 'Precipitation': 'double',\n",
       " 'Amenity': 'double',\n",
       " 'Bump': 'double',\n",
       " 'Crossing': 'double',\n",
       " 'Give_Way': 'double',\n",
       " 'Junction': 'double',\n",
       " 'No_Exit': 'double',\n",
       " 'Railway': 'double',\n",
       " 'Roundabout': 'double',\n",
       " 'Station': 'double',\n",
       " 'Stop': 'double',\n",
       " 'Traffic_Calming': 'double',\n",
       " 'Traffic_Signal': 'double',\n",
       " 'Turning_Loop': 'double',\n",
       " 'Source': 'double',\n",
       " 'Street': 'double',\n",
       " 'Side': 'double',\n",
       " 'City': 'double',\n",
       " 'County': 'double',\n",
       " 'State': 'double',\n",
       " 'Country': 'double',\n",
       " 'Timezone': 'double',\n",
       " 'Airport_Code': 'double',\n",
       " 'Wind_Direction': 'double',\n",
       " 'Weather_Condition': 'double',\n",
       " 'Sunrise_Sunset': 'double',\n",
       " 'Civil_Twilight': 'double',\n",
       " 'Nautical_Twilight': 'double',\n",
       " 'Astronomical_Twilight': 'double'}"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "##Now lets look at the final data types\n",
    "col_data_type = get_data_types(df)\n",
    "col_data_type\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "##Write another parquet to save it\n",
    "#df.write.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/Accident_numeric.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_numeric = spark.read.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/data_folder/accident_data/Accident_numeric.parquet\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Missing Values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "mean_dict = { col: 'mean' for col in df_numeric.columns }\n",
    "col_avgs = df_numeric.agg( mean_dict ).collect()[0].asDict()\n",
    "col_avgs = { k[4:-1]: v for k,v in col_avgs.items() }\n",
    "df_numeric = df_numeric.fillna( col_avgs )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>percent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>TMC</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Severity</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Start_Time</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>End_Time</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Start_Lat</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Start_Lng</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>End_Lat</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>End_Lng</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Distance</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Number</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Weather_Timestamp</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Temperature</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Wind_Chill</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Humidity</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Pressure</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Visibility</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Wind_Speed</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Precipitation</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Amenity</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Bump</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Crossing</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Give_Way</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Junction</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>No_Exit</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Railway</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Roundabout</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Station</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Stop</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Traffic_Calming</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Traffic_Signal</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Turning_Loop</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Source</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Street</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Side</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>City</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>County</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>State</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Country</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Timezone</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Airport_Code</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Wind_Direction</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Weather_Condition</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Sunrise_Sunset</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Civil_Twilight</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Nautical_Twilight</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Astronomical_Twilight</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       count  percent\n",
       "TMC                      0.0      0.0\n",
       "Severity                 0.0      0.0\n",
       "Start_Time               0.0      0.0\n",
       "End_Time                 0.0      0.0\n",
       "Start_Lat                0.0      0.0\n",
       "Start_Lng                0.0      0.0\n",
       "End_Lat                  0.0      0.0\n",
       "End_Lng                  0.0      0.0\n",
       "Distance                 0.0      0.0\n",
       "Number                   0.0      0.0\n",
       "Weather_Timestamp        0.0      0.0\n",
       "Temperature              0.0      0.0\n",
       "Wind_Chill               0.0      0.0\n",
       "Humidity                 0.0      0.0\n",
       "Pressure                 0.0      0.0\n",
       "Visibility               0.0      0.0\n",
       "Wind_Speed               0.0      0.0\n",
       "Precipitation            0.0      0.0\n",
       "Amenity                  0.0      0.0\n",
       "Bump                     0.0      0.0\n",
       "Crossing                 0.0      0.0\n",
       "Give_Way                 0.0      0.0\n",
       "Junction                 0.0      0.0\n",
       "No_Exit                  0.0      0.0\n",
       "Railway                  0.0      0.0\n",
       "Roundabout               0.0      0.0\n",
       "Station                  0.0      0.0\n",
       "Stop                     0.0      0.0\n",
       "Traffic_Calming          0.0      0.0\n",
       "Traffic_Signal           0.0      0.0\n",
       "Turning_Loop             0.0      0.0\n",
       "Source                   0.0      0.0\n",
       "Street                   0.0      0.0\n",
       "Side                     0.0      0.0\n",
       "City                     0.0      0.0\n",
       "County                   0.0      0.0\n",
       "State                    0.0      0.0\n",
       "Country                  0.0      0.0\n",
       "Timezone                 0.0      0.0\n",
       "Airport_Code             0.0      0.0\n",
       "Wind_Direction           0.0      0.0\n",
       "Weather_Condition        0.0      0.0\n",
       "Sunrise_Sunset           0.0      0.0\n",
       "Civil_Twilight           0.0      0.0\n",
       "Nautical_Twilight        0.0      0.0\n",
       "Astronomical_Twilight    0.0      0.0"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "##Check how many missing values are there\n",
    "missing_value_df = find_missing_values(df_numeric)\n",
    "missing_value_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "##Checked now we don't have any missing Values\n",
    "##Write another parquet to save it\n",
    "#df_numeric.write.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/Accident_imputed.parquet\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Normalization Step"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_imputed = spark.read.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/data_folder/accident_data/Accident_imputed.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- TMC: double (nullable = true)\n",
      " |-- Severity: double (nullable = true)\n",
      " |-- Start_Time: double (nullable = true)\n",
      " |-- End_Time: double (nullable = true)\n",
      " |-- Start_Lat: double (nullable = true)\n",
      " |-- Start_Lng: double (nullable = true)\n",
      " |-- End_Lat: double (nullable = true)\n",
      " |-- End_Lng: double (nullable = true)\n",
      " |-- Distance: double (nullable = true)\n",
      " |-- Number: double (nullable = true)\n",
      " |-- Weather_Timestamp: double (nullable = true)\n",
      " |-- Temperature: double (nullable = true)\n",
      " |-- Wind_Chill: double (nullable = true)\n",
      " |-- Humidity: double (nullable = true)\n",
      " |-- Pressure: double (nullable = true)\n",
      " |-- Visibility: double (nullable = true)\n",
      " |-- Wind_Speed: double (nullable = true)\n",
      " |-- Precipitation: double (nullable = true)\n",
      " |-- Amenity: double (nullable = true)\n",
      " |-- Bump: double (nullable = true)\n",
      " |-- Crossing: double (nullable = true)\n",
      " |-- Give_Way: double (nullable = true)\n",
      " |-- Junction: double (nullable = true)\n",
      " |-- No_Exit: double (nullable = true)\n",
      " |-- Railway: double (nullable = true)\n",
      " |-- Roundabout: double (nullable = true)\n",
      " |-- Station: double (nullable = true)\n",
      " |-- Stop: double (nullable = true)\n",
      " |-- Traffic_Calming: double (nullable = true)\n",
      " |-- Traffic_Signal: double (nullable = true)\n",
      " |-- Turning_Loop: double (nullable = true)\n",
      " |-- Source: double (nullable = true)\n",
      " |-- Street: double (nullable = true)\n",
      " |-- Side: double (nullable = true)\n",
      " |-- City: double (nullable = true)\n",
      " |-- County: double (nullable = true)\n",
      " |-- State: double (nullable = true)\n",
      " |-- Country: double (nullable = true)\n",
      " |-- Timezone: double (nullable = true)\n",
      " |-- Airport_Code: double (nullable = true)\n",
      " |-- Wind_Direction: double (nullable = true)\n",
      " |-- Weather_Condition: double (nullable = true)\n",
      " |-- Sunrise_Sunset: double (nullable = true)\n",
      " |-- Civil_Twilight: double (nullable = true)\n",
      " |-- Nautical_Twilight: double (nullable = true)\n",
      " |-- Astronomical_Twilight: double (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_imputed.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------+-------+\n",
      "|Severity|  count|\n",
      "+--------+-------+\n",
      "|     1.0|    968|\n",
      "|     4.0|  92337|\n",
      "|     3.0| 887620|\n",
      "|     2.0|1993410|\n",
      "+--------+-------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_imputed.groupBy(\"Severity\").count().show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Convert to vector and normalize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.mllib.linalg import Vectors\n",
    "from pyspark.ml.feature import VectorAssembler\n",
    "col_data_type = get_data_types(df_imputed)\n",
    "##Create a feature list, we want all columns as features except for Severity as feature vector\n",
    "feaure_list = []\n",
    "for key in col_data_type:\n",
    "    feaure_list.append(key) \n",
    "##As sever\n",
    "feaure_list.remove('Severity')\n",
    "assembler = VectorAssembler(\n",
    "    inputCols= feaure_list,\n",
    "    outputCol=\"features_vector\")\n",
    "df_imputed = assembler.transform(df_imputed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.mllib.util import MLUtils\n",
    "from pyspark.ml.feature import StandardScaler\n",
    "\n",
    "scaler = StandardScaler(inputCol=\"features_vector\", outputCol=\"scaled_features_vector\",\n",
    "                        withStd=True, withMean=False)\n",
    "\n",
    "# Compute summary statistics by fitting the StandardScaler\n",
    "scalerModel = scaler.fit(df_imputed)\n",
    "\n",
    "# Normalize each feature to have unit standard deviation.\n",
    "df_imputed = scalerModel.transform(df_imputed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "##Save the data frame as normalized\n",
    "#df_imputed.write.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/Accident_normalized.parquet\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train the normalized data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_normalized = spark.read.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/data_folder/accident_data/Accident_normalized.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- TMC: double (nullable = true)\n",
      " |-- Severity: double (nullable = true)\n",
      " |-- Start_Time: double (nullable = true)\n",
      " |-- End_Time: double (nullable = true)\n",
      " |-- Start_Lat: double (nullable = true)\n",
      " |-- Start_Lng: double (nullable = true)\n",
      " |-- End_Lat: double (nullable = true)\n",
      " |-- End_Lng: double (nullable = true)\n",
      " |-- Distance: double (nullable = true)\n",
      " |-- Number: double (nullable = true)\n",
      " |-- Weather_Timestamp: double (nullable = true)\n",
      " |-- Temperature: double (nullable = true)\n",
      " |-- Wind_Chill: double (nullable = true)\n",
      " |-- Humidity: double (nullable = true)\n",
      " |-- Pressure: double (nullable = true)\n",
      " |-- Visibility: double (nullable = true)\n",
      " |-- Wind_Speed: double (nullable = true)\n",
      " |-- Precipitation: double (nullable = true)\n",
      " |-- Amenity: double (nullable = true)\n",
      " |-- Bump: double (nullable = true)\n",
      " |-- Crossing: double (nullable = true)\n",
      " |-- Give_Way: double (nullable = true)\n",
      " |-- Junction: double (nullable = true)\n",
      " |-- No_Exit: double (nullable = true)\n",
      " |-- Railway: double (nullable = true)\n",
      " |-- Roundabout: double (nullable = true)\n",
      " |-- Station: double (nullable = true)\n",
      " |-- Stop: double (nullable = true)\n",
      " |-- Traffic_Calming: double (nullable = true)\n",
      " |-- Traffic_Signal: double (nullable = true)\n",
      " |-- Turning_Loop: double (nullable = true)\n",
      " |-- Source: double (nullable = true)\n",
      " |-- Street: double (nullable = true)\n",
      " |-- Side: double (nullable = true)\n",
      " |-- City: double (nullable = true)\n",
      " |-- County: double (nullable = true)\n",
      " |-- State: double (nullable = true)\n",
      " |-- Country: double (nullable = true)\n",
      " |-- Timezone: double (nullable = true)\n",
      " |-- Airport_Code: double (nullable = true)\n",
      " |-- Wind_Direction: double (nullable = true)\n",
      " |-- Weather_Condition: double (nullable = true)\n",
      " |-- Sunrise_Sunset: double (nullable = true)\n",
      " |-- Civil_Twilight: double (nullable = true)\n",
      " |-- Nautical_Twilight: double (nullable = true)\n",
      " |-- Astronomical_Twilight: double (nullable = true)\n",
      " |-- features_vector: vector (nullable = true)\n",
      " |-- scaled_features_vector: vector (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_normalized.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2974335"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_normalized.count()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Logistic Regression Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----------+--------+----------------------+\n",
      "|prediction|Severity|scaled_features_vector|\n",
      "+----------+--------+----------------------+\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       3.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       3.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       3.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "+----------+--------+----------------------+\n",
      "only showing top 5 rows\n",
      "\n",
      "Test Error = 0.259583 \n"
     ]
    }
   ],
   "source": [
    "from pyspark.ml.classification import LogisticRegression\n",
    "from pyspark.ml import Pipeline\n",
    "#from pyspark.ml.classification import DecisionTreeClassifier\n",
    "from pyspark.ml.feature import StringIndexer, VectorIndexer\n",
    "from pyspark.ml.evaluation import MulticlassClassificationEvaluator\n",
    "(trainingData, testData) = df_normalized.randomSplit([0.7, 0.3])\n",
    "\n",
    "# Train a DecisionTree model.\n",
    "LR = LogisticRegression(labelCol=\"Severity\", featuresCol=\"scaled_features_vector\")\n",
    "\n",
    "# Chain indexers and tree in a Pipeline\n",
    "pipeline = Pipeline(stages=[LR])\n",
    "\n",
    "# Train model.  This also runs the indexers.\n",
    "model = pipeline.fit(trainingData)\n",
    "\n",
    "# Make predictions.\n",
    "predictions = model.transform(testData)\n",
    "\n",
    "# Select example rows to display.\n",
    "predictions.select(\"prediction\", \"Severity\", \"scaled_features_vector\").show(5)\n",
    "\n",
    "# Select (prediction, true label) and compute test error\n",
    "evaluator = MulticlassClassificationEvaluator(\n",
    "    labelCol=\"Severity\", predictionCol=\"prediction\", metricName=\"accuracy\")\n",
    "accuracy = evaluator.evaluate(predictions)\n",
    "print(\"Test Error = %g \" % (1.0 - accuracy))\n",
    "\n",
    "# treeModel = model.stages[1]\n",
    "# # summary only\n",
    "# print(treeModel)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7404166816079176"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.723280890732743"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evaluator = MulticlassClassificationEvaluator(\n",
    "    labelCol=\"Severity\", predictionCol=\"prediction\", metricName=\"f1\")\n",
    "f1 = evaluator.evaluate(predictions)\n",
    "f1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7262139886740335"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evaluator = MulticlassClassificationEvaluator(\n",
    "    labelCol=\"Severity\", predictionCol=\"prediction\", metricName=\"weightedPrecision\")\n",
    "Precision = evaluator.evaluate(predictions)\n",
    "Precision"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7404166816079176"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evaluator = MulticlassClassificationEvaluator(\n",
    "    labelCol=\"Severity\", predictionCol=\"prediction\", metricName=\"weightedRecall\")\n",
    "Recall = evaluator.evaluate(predictions)\n",
    "Recall"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Decision tree classifier for classification of severity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----------+--------+----------------------+\n",
      "|prediction|Severity|scaled_features_vector|\n",
      "+----------+--------+----------------------+\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "+----------+--------+----------------------+\n",
      "only showing top 5 rows\n",
      "\n",
      "Test Error = 0.232019 \n"
     ]
    }
   ],
   "source": [
    "\n",
    "from pyspark.ml import Pipeline\n",
    "from pyspark.ml.classification import DecisionTreeClassifier\n",
    "from pyspark.ml.feature import StringIndexer, VectorIndexer\n",
    "from pyspark.ml.evaluation import MulticlassClassificationEvaluator\n",
    "(trainingData, testData) = df_normalized.randomSplit([0.7, 0.3])\n",
    "\n",
    "# Train a DecisionTree model.\n",
    "dt = DecisionTreeClassifier(labelCol=\"Severity\", featuresCol=\"scaled_features_vector\")\n",
    "\n",
    "# Chain indexers and tree in a Pipeline\n",
    "pipeline = Pipeline(stages=[dt])\n",
    "\n",
    "# Train model.  This also runs the indexers.\n",
    "model = pipeline.fit(trainingData)\n",
    "\n",
    "# Make predictions.\n",
    "predictions = model.transform(testData)\n",
    "\n",
    "# Select example rows to display.\n",
    "predictions.select(\"prediction\", \"Severity\", \"scaled_features_vector\").show(5)\n",
    "\n",
    "# Select (prediction, true label) and compute test error\n",
    "evaluator = MulticlassClassificationEvaluator(\n",
    "    labelCol=\"Severity\", predictionCol=\"prediction\", metricName=\"accuracy\")\n",
    "accuracy = evaluator.evaluate(predictions)\n",
    "print(\"Test Error = %g \" % (1.0 - accuracy))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7679813137508612"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evaluator = MulticlassClassificationEvaluator(\n",
    "    labelCol=\"Severity\", predictionCol=\"prediction\", metricName=\"accuracy\")\n",
    "accuracy = evaluator.evaluate(predictions)\n",
    "accuracy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Decision tree classifier with different max depth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----------+--------+----------------------+\n",
      "|prediction|Severity|scaled_features_vector|\n",
      "+----------+--------+----------------------+\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "+----------+--------+----------------------+\n",
      "only showing top 5 rows\n",
      "\n",
      "+----------+--------+----------------------+\n",
      "|prediction|Severity|scaled_features_vector|\n",
      "+----------+--------+----------------------+\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "+----------+--------+----------------------+\n",
      "only showing top 5 rows\n",
      "\n",
      "+----------+--------+----------------------+\n",
      "|prediction|Severity|scaled_features_vector|\n",
      "+----------+--------+----------------------+\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       3.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       3.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       3.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "+----------+--------+----------------------+\n",
      "only showing top 5 rows\n",
      "\n",
      "+----------+--------+----------------------+\n",
      "|prediction|Severity|scaled_features_vector|\n",
      "+----------+--------+----------------------+\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       3.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       3.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "+----------+--------+----------------------+\n",
      "only showing top 5 rows\n",
      "\n",
      "+----------+--------+----------------------+\n",
      "|prediction|Severity|scaled_features_vector|\n",
      "+----------+--------+----------------------+\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "+----------+--------+----------------------+\n",
      "only showing top 5 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from pyspark.ml import Pipeline\n",
    "from pyspark.ml.classification import DecisionTreeClassifier\n",
    "from pyspark.ml.feature import StringIndexer, VectorIndexer\n",
    "from pyspark.ml.evaluation import MulticlassClassificationEvaluator\n",
    "(trainingData, testData) = df_normalized.randomSplit([0.7, 0.3])\n",
    "max_depth=[2,5,10,15,20]\n",
    "score_list=[]\n",
    "for k in max_depth:\n",
    "    dt = DecisionTreeClassifier(labelCol=\"Severity\", featuresCol=\"scaled_features_vector\", maxDepth=k)\n",
    "\n",
    "    # Chain indexers and tree in a Pipeline\n",
    "    pipeline = Pipeline(stages=[dt])\n",
    "\n",
    "    # Train model.  This also runs the indexers.\n",
    "    model = pipeline.fit(trainingData)\n",
    "\n",
    "    # Make predictions.\n",
    "    predictions = model.transform(testData)\n",
    "\n",
    "    # Select example rows to display.\n",
    "    predictions.select(\"prediction\", \"Severity\", \"scaled_features_vector\").show(5)\n",
    "\n",
    "    # Select (prediction, true label) and compute test error\n",
    "    evaluator = MulticlassClassificationEvaluator(\n",
    "        labelCol=\"Severity\", predictionCol=\"prediction\", metricName=\"accuracy\")\n",
    "    accuracy=evaluator.evaluate(predictions)\n",
    "    score_list.append(accuracy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3dd3yV9fn/8ddFGGGvBGSFHaYiGGYVrBNxtraKA8Q9qlZ/2larX8XRb9V+rdpqLQ5EKYqjDrS4qLuKEBBl770hhB2yrt8f9x17jAkcIDnnJHk/H4/z4Jz787nPfeXkcF+57/u6Px9zd0RERBJNtXgHICIiUhIlKBERSUhKUCIikpCUoEREJCEpQYmISEJSghIRkYSkBCUSZ2bmZtYp3nGIJBolKEk4ZrbCzHLNLKXY8m/CnXm7cty2m9luM9tlZlvN7N9mdn4Zvv8nZnZFWb3f4TCzeuHP+W68YxEpiRKUJKrlwAVFL8zsSKBOjLbdy93rAV2AccDjZnZ3jLYdS+cC+4CTzeyIWG7YzKrHcntSMSlBSaIaD4yMeH0J8EJkBzM7PTyq2mFmq81sdETb+Wa23MwahK9PM7MNZpYabQDuvsXdxwPXArebWdPwvRqa2bNmtt7M1prZ/WaWFLaNMrP/mNnjZrbdzBaY2Ylh2x+A4wgS3i4zezxicyeZ2WIzyzazJ8zMisdjZi3NbK+ZNYlY1tvMtphZDTPrZGafhtvdYmYvH+BHvAT4O/AdcHGxbR1rZl+G8aw2s1Hh8tpm9rCZrQy380W47HgzW1PsPVaY2Unh89Fm9pqZ/cPMdgCjzKyfmX0VbmN9+JnVjFi/h5l9aGZZZrbRzH5vZkeY2Z6i30XYr4+ZbTazGgf4eaWicXc99EioB7ACOAlYCHQDkoA1QFvAgXZhv+OBIwn+0DoK2AicE/E+EwiOgJoC64Azoti2A52KLasB5AOnha/fAMYAdYFmwDTg6rBtVNj35nC984HtQJOw/RPgihK2+Q7QCEgDNgNDS4nvI+DKiNd/Av4ePn8JuCP8PJKBY/fzc7YFCoHuwC3Ad8XadhIcwdYIP7+jw7Ynwp+hVfh7GQTUCn8Xa0r6PYbPRwN5wDlhfLWBY4ABQHWgHTAfuCnsXx9YH8aWHL7uH7ZNBq6N2M4jwF/j/b3Vo+wfOoKSRFZ0FHUywc5rbWSju3/i7rPdvdDdvyPYQQ+J6PIr4ASCHerb7v7OoQTh7nnAFqCJmTUHhhHsSHe7+yaCHeTwiFU2AY+6e567v0yQaE8/wGYecPdsd18FfAwcXUq/FwlPfYZHWcPDZRAkgLZAS3fPcfcv9rO9EQRJaR4wEehhZr3DtguBKe7+UvgzbHX3WWZWDbgM+LW7r3X3Anf/0t33HeBnK/KVu78Z/r72uvsMd5/q7vnuvoIg6Rf9/s4ANrj7w+HPstPdvw7bnic84guPXC8g+K5IJaMEJYlsPMHOchTFTu8BmFl/M/s4PL2zHbgG+L6wwt2zgVeBnsDDhxpEeOooFcgiSAA1gPXhqalsgh1rs4hV1rp75CjMK4GWB9jMhojne4B6pfT7JzDQzFoAgwmOgj4P234LGDDNzOaa2WX72d5IgiNM3H0t8CnBKT+ANsDSEtZJITiaKaktGqsjX5hZupm9E5563QH8L//9/ZUWA8BbQHcza0/wx8t2d592iDFJAlOCkoTl7isJiiWGAa+X0OVFYBLQxt0bElxP+f7ajZkdTfAX/0vAXw4jlLMJTttNI9jJ7gNS3L1R+Gjg7j0i+rcqdg0pjeAUIwSn8w6Zu28DPiA4dXghMLEoGbr7Bne/0t1bAlcDfyupfN3MBgGdCa6rbTCzDUB/4MKweGE10LGEzW8Bckpp201EEUt4ZFP8el/xn/1JYAHQ2d0bAL/nv7+/1UCHUj6DHOAVgqOoEejoqdJSgpJEdzlwgrvvLqGtPpDl7jlm1o9ghw2AmSUD/yDY6V1KkDSuO5gNm1kTM7uI4LrLg+GprvUECeJhM2tgZtXMrKOZRZ5abAbcGBYu/JLgOtrksG0jpex4D8KLBEdAv+C/p/cws1+aWevw5TaChFBYwvqXAB8SXH86Onz0JLgudBrBkdVJZnaemVU3s6ZmdrS7FwJjgT+HBRtJZjbQzGoBi4BkCwpXagB3Elyb2p/6wA5gl5l1JShGKfIO0MLMbjKzWmZW38z6R7S/QHBkfRZKUJWWEpQkNHdf6u6ZpTRfB9xrZjuBuwj+qi7yR2C1uz8ZXiO5GLjfzDpHsdlvzWwXsAS4ArjZ3e+KaB8J1ATmESSC14AWEe1fExyhbAH+APzC3beGbY8BvzCzbWZ2qEd1k8L33+Du30Ys7wt8HcY+ieBa0bLIFcPEfR5BUcGGiMdygh39JeF1sGEEBQpZwCygV/gWtwKzgelh24NANXffTvD7eIbgWuFugsKW/bmV4I+KncDTwPdVh+6+k+D03ZkEpz8XAz+NaP8PQfKdGR5pSyVkPzxVLiKHIyzHvsLdj413LJWdmX0EvOjuz8Q7FikfullORCocM+sL9CG4PiiVlE7xSZViZseFN8n+6BHv2CQ6ZvY8MIWg1H9nvOOR8qNTfCIikpB0BCUiIgkpZtegzGwoQQVTEvCMuz9QrD2N4A7xRmGf29x9cth2FMHNkA0IKnf6hvdClCglJcXbtWtXHj+GiIiUsRkzZmxx9x+NkxmTU3zhTXuLCMpG1xCUqF4QDrNS1Ocp4Bt3f9LMugOT3b1deOPgTGCEu38bDhKZ7e4FpW0vIyPDMzNLq0wWEZFEYmYz3D2j+PJYneLrByxx92Xunksw9lfx6hsnOEICaMh/77w/hWDMsG8BwpslS01OIiJSOcQqQbXih+NwrQmXRRoNXBwO2T8ZuCFcng64mb1vZjPN7LclbcDMrjKzTDPL3Lx5c9lGLyIiMZdIRRIXAOPcvTXBXezjw9GTqwPHAheF//7Mwvl1Irn7U+6e4e4ZqalRT/kjIiIJKlYJai3B6MRFWlNs6gSCMddeAXD3rwhGTU4hONr6zIPJ4/YQHF31KfeIRUQkrmKVoKYDnc2sfThj5nCCscIirQKKZh7tRpCgNgPvA0eaWZ2wYGIIwRhoIiJSicWkzNzd883seoJkkwSMdfe5ZnYvkOnukwgGpnzazG4mKJgYFU4jsM3M/kyQ5Jyguu9fsYhbRETip1KOJKEycxGRiqO0MnMNFisiIgdtXfZePlu0mbyCQkYMbFcu21CCEhGRA8rJK2Da8iw+XbSZzxZtZvGmYHzlXq0bKkGJiEjsuDtLN+/ms0Wb+XTRZr5evpWcvEJqVq9G//ZNOC+jDUO6pNK5Wb1yi0EJSkREANiZk8d/lmzls8Wb+XThZtZm7wWgQ0pdhvdNY0iXVAa0b0rtmkkxiUcJSkSkiiosdOat38Gni4KENHPVNvILnbo1k/hJpxSuPb4jQ9JTadOkTlziU4ISEalCtuzax+eLN/PZoi18tmgzW3fnAtCjZQOuGtyBwemp9ElrTM3q8R9oSAlKRKQSyyso5JtV2Xy6aBOfLdrC7LXbAWhStybHdU5hSHoqx3VOJbV+rThH+mNKUCIilcyabXv4bNEWPl20iS+XbGXnvnySqhl90hpx6ynpDE5PpWfLhlSrZvEOdb+UoEREKricvAKmLtv6fQn40s27AWjZMJkzerVgSHoqAzum0LB2jThHenCUoEREKhh3Z8mmXUFxw6LNTFuexb78QmpVr0b/Dk25oF8ax3dJpWNqPcwS+yhpf5SgREQqgO178/hyyZbvS8DXbc8BoGNqXS7q35YhXVLp374JyTViUwIeC0pQIiIJqLDQmbNuO58uDI6SvlmdTUGhU79WdX7SKYXrT0hlcHoKrRvHpwQ8FpSgREQSxKadOXy+KDhK+nzxFrLCEvAjWzXk2iEdGZyeSu+0RtRIin8JeCwoQYmIxElufiEzV237vrhh7rodAKTUq8mQ9FSGpKdybOcUUuolXgl4LChBiYjE0OqsPd8XN3y5ZAu7cwuoXs3o07Yxvzm1C0PSU+neokHCl4DHghKUiEg52pv7wxLwZVuCEvBWjWpzdu9WDElPZVDHptRPrlgl4LGgBCUiUobcncWbdn1f3DBtRRa5+YUk16jGgA5NGTGwLYPTU+mQUrdCl4DHghKUiMhh2r4njy+WbPl+aooNO4IS8M7N6jFyQFAC3rdd5SoBjwUlKBGRg1RQ6MxeW1QCvolZq7MpdKifXJ3jOqcwuHMqg9NTadmodrxDrdCUoEREorBpR05wHWnxFj5fvJnsPXmYwVGtGnL9TzsxpEsqvVo3onoVKQGPBSUoEZES5OYXkrmyaIrzLcxfX1QCXosTujb7fhTwJnVrxjnSyksJSkQktHLrf6c4/3LpVvbkFlAjyTimbWN+N7Qrg9NT6HaESsBjRQlKRKqs3fvyf1ACvmLrHgDaNKnNz/u0Ykh6MwZ2bEq9WtpVxoM+dRGpMtydhRt3fl8CnrliG7kFhdSukcTAjk0ZNagdQ7o0o13TOioBTwBKUCJSqWXvyeXzxUEJ+GeLN7Nxxz4Auh5Rn1E/aceQ9FQy2jWmVnWVgCcaJSgRqVQKCp1v12R/f5T03ZqgBLxh7Roc2zmFIWEJ+BENk+MdqhyAEpSIVArb9+Tx6L8X8frMtWzfG5SA92rdiBtO6Px9CXiSihsqFCUoEanQCgqdidNX8X/vL2T73jzO7NWSk7o159hOKTRWCXiFpgQlIhXW9BVZ3P3WXOat30G/9k0YfWYPurdsEO+wpIzE7JZnMxtqZgvNbImZ3VZCe5qZfWxm35jZd2Y2rIT2XWZ2a6xiFpHEtH77Xm586Rt++fevyN6Ty+MX9ublqwYoOVUyMTmCMrMk4AngZGANMN3MJrn7vIhudwKvuPuTZtYdmAy0i2j/M/BuLOIVkcSUk1fAM58v44mPl1Lgzo0ndubaIR2pXVMVeJVRrE7x9QOWuPsyADObCJwNRCYoB4r+/GkIrCtqMLNzgOXA7phEKyIJxd35YN5G7v/XPFZn7WVojyO44/RutGlSJ96hSTmKVYJqBayOeL0G6F+sz2jgAzO7AagLnARgZvWA3xEcfZV6es/MrgKuAkhLSyuruEUkzpZs2sk9b8/j88VbSG9ejwlX9OcnnVLiHZbEQCIVSVwAjHP3h81sIDDezHoSJK5H3H3X/u7sdvengKcAMjIyPAbxikg52r43j8emLOaFr1ZQp2YSd5/ZnYsHtKWGRguvMmKVoNYCbSJetw6XRbocGArg7l+ZWTKQQnCk9QszewhoBBSaWY67P17+YYtIrBUWOq/OWM1D7y0ka08uw/umcesp6TStVyveoUmMxSpBTQc6m1l7gsQ0HLiwWJ9VwInAODPrBiQDm939uKIOZjYa2KXkJFI5zViZxehJ85i9djsZbRvz/Fn96NmqYbzDkjiJSYJy93wzux54H0gCxrr7XDO7F8h090nALcDTZnYzQcHEKHfXqTqRKmDjjhweeHcBb3yzliMaJPPY8KM5q1dLDdhaxVllzAEZGRmemZkZ7zBE5AD25Rcw9osV/PWjxeQXOFcObs91x3eirqa3qFLMbIa7ZxRfrm+BiMScu/PRgk3c9848Vmzdw8ndm3Pn6d1o27RuvEOTBKIEJSIxtXTzLu57Zx6fLNxMx9S6vHBZPwanp8Y7LElASlAiEhM7c/L460dLGPvFcmrXSOLO07txyaB2KhuXUilBiUi5Kix0/jlzDQ++t5Ctu/dx3jFtuPXULqTWV9m47J8SlIiUm1mrs7l70ly+XZ1N77RGPHtJBr3aNIp3WFJBKEGJSJnbtDOHh95byGsz1pBavxZ/Pq8X5xzdimqaMFAOghKUiJSZ3PxCxn25nL/8ewn78gu4ZkhHrj+hE/VUNi6HQN8aESkTHy/cxH1vz2PZlt2c0LUZ/3NGd9qnqGxcDp0SlIgclhVbdnPfO/P494JNtE+py3Oj+vLTrs3iHZZUAkpQInJIdu3L5/GwbLxGknH7aV259CftqVldZeNSNpSgROSguDtvzlrLHycvYNPOfZzbpzW/G9qFZg2S4x2aVDJKUCISte/WZDN60lxmrsqmV+uGjBlxDL3TGsc7LKmklKBE5IC27NrHn95byCszVtO0bk0e+sVR/KJPa5WNS7lSghKRUuUVFPLCVyt5dMoi9uYWcMWx7bnhxM40SK4R79CkClCCEpESfb54M/e8PY8lm3YxJD2V/zmjO52a1Yt3WFKFKEGJyA+s2rqH+/81jw/mbaRt0zo8e0kGJ3RtpskDJeaUoEQEgD25+fzt46U89fkyqlczfju0C5cf255a1ZPiHZpUUUpQIlWcuzPp23X8cfICNuzI4We9W3HbaV1prrJxiTMlKJEqbM7a7dzz9lymr9jGka0a8sRFvTmmbZN4hyUCKEGJVElZu3P5vw8W8tK0VTSpU5MHfn4kv8xoQ5LKxiWBKEGJVCH5BYX8Y+pK/vzhInbnFnDpoPb8+qTONKytsnFJPEpQIlXEl0u2cM/b81i4cSfHdkrh7jO707l5/XiHJVIqJSiRSm511h7+d/J83p2zgTZNajNmxDGc0r25ysYl4SlBiVRSe3MLePLTpYz5dCnVzLj1lHSuOK4DyTVUNi4VgxKUSCXj7kyevYE//Gse67bncFavltw+rCstGtaOd2giB0UJSqQSmb9+B6MnzeXr5Vl0a9GAR4f3pl97lY1LxaQEJVIJbNudy58/XMSEr1fSsHYN/vCzngzvm6aycanQlKBEKrCCQufFaat4+IOF7MzJZ+TAdtx0Umca1akZ79BEDpsSlEgFNXXZVkZPmsuCDTsZ2KEpd5/Vna5HNIh3WCJlplqsNmRmQ81soZktMbPbSmhPM7OPzewbM/vOzIaFy082sxlmNjv894RYxSySiNZl7+X6F2cy/Kmp7MzJ58mL+vDilf2VnKTSickRlJklAU8AJwNrgOlmNsnd50V0uxN4xd2fNLPuwGSgHbAFONPd15lZT+B9oFUs4hZJJDl5BYz5dBlPfroEd7jppM5cPbgjtWuqbFwqp1id4usHLHH3ZQBmNhE4G4hMUA4U/QnYEFgH4O7fRPSZC9Q2s1ruvq/coxZJAO7Oe3M2cP+/5rM2ey+nH9mC24d1pXXjOvEOTaRcxSpBtQJWR7xeA/Qv1mc08IGZ3QDUBU4q4X3OBWaWlJzM7CrgKoC0tLQyCFkk/hZu2Mk9b8/ly6Vb6XpEfV66cgADOzaNd1giMZFIRRIXAOPc/WEzGwiMN7Oe7l4IYGY9gAeBU0pa2d2fAp4CyMjI8BjFLFIutu/J45Epixg/dSX1alXn3rN7cGG/NKonxeyysUjcxSpBrQXaRLxuHS6LdDkwFMDdvzKzZCAF2GRmrYE3gJHuvjQG8YrERUGh8/L01fzp/QVs35vHhf3TuOXkLjSuq7JxqXpilaCmA53NrD1BYhoOXFiszyrgRGCcmXUDkoHNZtYI+Bdwm7v/J0bxisTc9BVZjJ40l7nrdtCvfRNGn9mD7i1VmSdVV0wSlLvnm9n1BBV4ScBYd59rZvcCme4+CbgFeNrMbiYomBjl7h6u1wm4y8zuCt/yFHffFIvYRcrb+u17+ePkBUz6dh0tGibz1wt6c8ZRLTTauFR55n7gyzVm1svdv41BPGUiIyPDMzMz4x2GyH7l5BXw7BfLefyjJRS4c83gDlxzfEfq1EykS8Mi5c/MZrh7RvHl0f5PmGJm64DxwAR3X1+m0YlUIe7Oh/M2cv+/5rMqaw9DexzBHad3o00TlY2LRIo2QbUATgcuBkab2ZfAC8Dr7r6nvIITqWyWbNrJPW/P4/PFW+jcrB7/uLw/x3ZOiXdYIgkpqgTl7vnAW8BbZtYQ+CXwW+BJM3sDGKMCBpHS7cjJ47Epi3n+yxXUrpnE3Wd25+IBbamhsnGRUh3UyW4zqwecQ1CF1xqYSFB9N8HM/uXuvyr7EEUqrsJC59UZq3novYVk7clleN80bj0lnab1asU7NJGEF1WCMrPTgRHAacB/gGeAN909J2x/giBRKUGJhHbvy+faCTP5bNFmjmnbmOfP6kfPVg3jHZZIhRHtEdQDBNecbi6pQMLds8zspjKNTKQC27prH5eNm86cdTu4/5yeXNQ/TWXjIgcp2mtQR0bR55nDD0ek4ludtYeRY6exLnsvYy4+hpO6N493SCIVUlRXaM3sdTM7rtiy48zstfIJS6Rimr9+B+c++SVZu3OZcEV/JSeRwxBtCdEQ4Mtiy74Cflq24YhUXFOXbeW8v39FNTNevWYgGe2axDskkQot2gSVQzAFRqR6QF7ZhiNSMb03Zz0jx06jecNk/nndINKb1493SCIVXrQJ6n1gjJk1AAj/fRx4r7wCE6koJny9kusmzKRHywa8evVAWjWqHe+QRCqFaBPULQSz3WaZ2SYgi2DWW1XuSZXl7jw2ZTF3vDGH47s0Y8IV/TUthkgZiraKbxtwupm1ILhBd7W7byjXyEQSWEGhc/ekOfxj6irO7dOaB849UqNCiJSxgxpJwt3Xm9kGwMysWrissFwiE0lQOXkF3PzyLN6ds4FrhnTkd0O76B4nkXIQ7UgSLYEngMFAo2LNSWUdlEii2pGTx1UvZDJ1WRZ3nt6NK47rEO+QRCqtaM9JjAFyCWa83QX0ASYB15RTXCIJZ9OOHM4fM5XMFdt49PyjlZxEylm0p/gGAWnuvtvM3N2/NbPLCe6Nerr8whNJDMu37Gbk2K/ZuiuXsaP6Mjg9Nd4hiVR60SaoAiA/fJ5tZqnADqBVuUQlkkBmr9nOqOem4cBLVw6gV5viZ7lFpDxEe4rva2BY+Px94GXgdUDzqkul9vnizQx/6iuSayTx2jUDlZxEYijaI6gR/DeZ3URwX1R94NHyCEokEUz6dh23vDKLjqn1eP6yfjRvkBzvkESqlAMmKDNLAh4DrgJw973A/eUcl0hcPfef5dzz9jz6tW/C0yMzaFi7RrxDEqlyDpig3L3AzE4BdL+TVHruzp/eX8jfPlnKqT2a89jw3iTX0J0UIvEQ7TWoR4B7zEx/RkqllV9QyO/++R1/+2QpF/RL428XHaPkJBJH0V6DugE4Avh/ZrYZ8KIGd08rj8BEYmlvbgHXvziTfy/YxI0ndubmkzprdAiROIs2QV1crlGIxFH2nlwufz6Tmau2cd85PRkxoG28QxIRoh8s9tPyDkQkHtZv38vIZ6excusenriwD8OObBHvkEQkFO1YfPeW1ubud5VdOCKxs2TTTkY+O42dOfmMu6wvgzqmxDskEYkQ7Sm+NsVeH0EwDfwbZRuOSGzMWLmNy5+fTvVq1Zh49QB6tGwY75BEpJioqvjc/dJij9OAn/Pf4Y8OyMyGmtlCM1tiZreV0J5mZh+b2Tdm9p2ZDYtouz1cb6GZnRrtNkVK8tGCjVz0zFQa1a7B69cOUnISSVAHNR9UMR8QDHl0QOHNvk8AJwNrgOlmNsnd50V0uxN4xd2fNLPuwGSgXfh8ONADaAlMMbN0dy84jNilinptxhp+98/v6N6iAc9d2peUerXiHZKIlCLaa1DF5xWoA1wIrI5yO/2AJe6+LHy/icDZQGSCcoJp5SGYTn5d+PxsYKK77wOWm9mS8P2+inLbIrg7Yz5bxgPvLuDYTin8fcQx1Kt1OH+fiUh5i/Z/6BKCBFJ0Y8ge4BvgkijXb8UPk9kaoH+xPqOBD8zsBqAucFLEulOLratR1CVqhYXOHybP59kvlnNmr5Y8/Mte1Kyu6dlFEl20Zeax+N98ATDO3R82s4HAeDPrGe3KZnYV4XiBaWm6d1gCufmF/Oa1b3lr1jpGDWrHXWd0p1o13YArUhFElXjM7Ggza1NsWRsz6xXldtbyw0rA1uGySJcDrwC4+1dAMpAS5bq4+1PunuHuGampmkxOYPe+fC5/fjpvzVrHb4d24e4zlZxEKpJoj4z+ARQfh68mMD7K9acDnc2svZnVJCh6mFSszyqCKeUxs24ECWpz2G+4mdUys/ZAZ2BalNuVKmrrrn1c+PRUvly6lYfOPYrrju+koYtEKphor0GlFRU4FHH3pWbWLpqV3T3fzK4nmOwwCRjr7nPDG4Az3X0SwRxTT5vZzQTXu0a5uwNzzewVgoKKfOBXquCT/VmdtYeRY6exLnsvYy4+hpO6N493SCJyCCzIAQfoZDYPuNjdZ0Ys6wO86O5dyzG+Q5KRkeGZmZrstyqav34Hl4ydxr78Qp69JIOMdk3iHZKIHICZzXD3jOLLoz2CegR4y8weApYCHYFbgT+UXYgih2fqsq1c+UImdWtW59VrBpLevH68QxKRwxBtFd/TZpZNUMjQhqBk/BZ3f608gxOJ1ntzNnDjxG9o07g2L1zen1aNasc7JBE5TFHfqejurwKvlmMsIodkwtcr+Z8359CrTSPGXtKXxnVrxjskESkD0ZaZ/8XMBhVbNsjMHi2fsEQOzN15bMpi7nhjDkPSU5lwRX8lJ5FKJNoy8wuA4lUHMwiGOxKJuYJC53/emsMjUxZxbp/WPDUygzo1NXSRSGUS7f9o58fJLKmEZSLlLievgJtfnsW7czZw9ZAO3Da0q+5xEqmEok0wnwP3m1k1gPDfe8LlIjGzIyePUc9N4905G7jz9G7cflo3JSeRSiraI6hfA+8A681sJdCWYLTxM8srMJHiNu3I4ZLnprN4404ePf9ozumtMYNFKrNoy8zXhDfm9iMoM98InEMw5FDL8gtPJLB8y25Gjv2arbtyeXZUX4aka7xFkcruYK4qNyWYImMUcBTB6b1fl0NMIj8we812Rj03DQdevHIAR7dpFO+QRCQG9pugzKwGcBZBUjqVYF6ol4A04Dx331TeAUrV9sXiLVw9PpNGdWoy/vJ+dEitF++QRCRGDlQksREYAywEBrh7d3e/D8gt98ikypv07TouHTeNNk3q8Pp1g5ScRKqYAyWo74BGBKf2+ppZ4/IPSQSe+89ybnzpG3qnNeblqwfSvEFyvEMSkRjbb4Jy9+MJBob9gGBw2A1m9jbBlOzF54cSOWzuzkPvLeCet+dxao/mvHBZPxrW1ldNpCo64H1Q7r7S3e9z984EEwquBwqBb8PRzUXKRH5BIb/753f87ZOlXABk1P0AABMwSURBVNAvjb9ddAzJNZLiHZaIxMlBjQ3j7l8AX5jZjcDPgJHlEpVUOXtzC7jhpZlMmb+JG0/szM0nddYNuCJV3CENXubuOQTVfC+VbThSFWXvyeWK5zOZsWob953TkxED2sY7JBFJABpdU+Jq/fa9jHx2Giu37uGJC/sw7MgW8Q5JRBKEEpTEzZJNOxn57DR25uQz7rK+DOqYEu+QRCSBKEFJXMxYuY3Ln59O9WrVmHj1AHq0bBjvkEQkwShBScx9tGAj102YyRENknnhsv6kNa0T75BEJAEpQUlMvTZjDb/753d0b9GA5y7tS0q9WvEOSUQSlBKUxIS7M+azZTzw7gJ+0qkpY0ZkUK+Wvn4iUjrtIaTcFRY6f5g8n2e/WM4ZR7Xg4fN6Uau6bsAVkf1TgpJylZtfyG9e+5a3Zq1j1KB23HVGd6pV0w24InJgSlBSbnbvy+eaf8zg88Vb+M2pXbju+I4aHUJEoqYEJeVi6659XDZuOrPXbuehc4/ivL5t4h2SiFQwSlBS5lZn7WHk2Gmsy97LmBEZnNy9ebxDEpEKSAlKytT89Tu4ZOw0cvIKmHBFfzLaNYl3SCJSQR1wuo2yYmZDzWyhmS0xs9tKaH/EzGaFj0Vmlh3R9pCZzTWz+Wb2F9OFjIT09bKtnDfmK6qZ8dq1g5ScROSwxOQIysySgCeAk4E1wHQzm+Tu84r6uPvNEf1vAHqHzwcBPwGOCpu/AIYAn8QidonOe3M2cOPEb2jTuDYvXN6fVo1qxzskEangYnUE1Q9Y4u7L3D0XmAicvZ/+F/DfqTwcSAZqArUIZvLdWI6xykF68etVXDdhBj1aNuC1awYpOYlImYhVgmoFrI54vSZc9iNm1hZoD3wE4O5fAR8TzOS7Hnjf3eeXsN5VZpZpZpmbN28u4/ClJO7OY1MW8/s3ZjMkPZUJV/Sncd2a8Q5LRCqJmF2DOgjDgdfcvQDAzDoB3YDWBEntBDM7rvhK7v6Uu2e4e0ZqampMA66KCgqdu96ayyNTFnFun9Y8NTKDOjVVcyMiZSdWCWotEHkjTOtwWUmG88OZen8GTHX3Xe6+C3gXGFguUUpUcvKC6dnHT13J1UM68H+/PIoaSYn4t46IVGSx2qtMBzqbWXszq0mQhCYV72RmXYHGwFcRi1cBQ8ysupnVICiQ+NEpPomNHTl5jHpuGpNnb+DO07tx+2ndNDqEiJSLmJyTcfd8M7seeB9IAsa6+1wzuxfIdPeiZDUcmOjuHrH6a8AJwGyCgon33P3tWMQtP7RpZw6jxk5n0cadPHr+0ZzTu8TLiCIiZcJ+mAsqh4yMDM/MzIx3GJXKii27GTH2a7buyuXJi49hSLqu84lI2TCzGe6eUXy5rmrLAc1es51Rz03DgRevHMDRbRrFOyQRqQKUoGS/vli8havHZ9KoTk3GX96PDqn14h2SiFQRSlBSqknfruOWV2bRMbUez1/Wj+YNkuMdkohUIUpQUqLn/rOce96eR7/2TXh6ZAYNa9eId0giUsUoQckPuDt/en8hf/tkKaf2aM5jw3uTXEPTs4tI7ClByffyCwr5/RuzeSVzDRf0S+P+c3qSpOnZRSROlKAEgL25wegQU+Zv4sYTO3PzSZ11A66IxJUSlJC9J5crns9kxqpt3Hd2D0YMbBfvkERElKCquvXb93LJ2Gms2LKHJy7sw7AjW8Q7JBERQAmqSluyaScjn53Gjpx8xl3Wl0EdU+IdkojI95SgqqiZq7Zx2bjpVK9WjYlXDaBnq4bxDklE5AeUoKqgjxds4toJM2jeIJnxl/UnrWmdeIckIvIjSlBVSH5BIeO+XMEf311Atxb1eW5UP1Lr14p3WCIiJVKCqiJmrc7mjjdmM3fdDk7o2oy/XNCberX06xeRxKU9VCW3fW8ef3p/ARO+XkVqvVo8fmFvTj+yhe5xEpGEpwRVSbk7b81ax/3/mk/W7n1cMrAdt5ySTv1kjaknIhWDElQltHTzLv7nzTl8uXQrvVo3ZNylfVWlJyIVjhJUJZKTV8ATHy9hzKfLqFWjGved05ML+6VpPD0RqZCUoCqJTxZu4q635rIqaw/nHN2S35/ejWb1NX+TiFRcSlAV3IbtOdz7zlwmz95Ah5S6vHhFfwZ10ogQIlLxKUFVUPkFhbzw1Uoe/mAheYXOLSenc9WQDtSqrrmbRKRyUIKqgL5ZtY073pjDvPU7GJKeyr1n96Bt07rxDktEpEwpQVUg2/fk8dD7C3hx2iqa1a/F3y7qw2k9j9A9TSJSKSlBVQDuzhvfrOV/J88na3culw5qz80nd9Y9TSJSqSlBJbglm3Zx55uzmbosi6PbNGLcpf10T5OIVAlKUAkqJ6+Axz9awpjPllK7RhJ/+FlPLuibRjXd0yQiVYQSVAL6eMEm7po0h9VZe/l571bcPqybRh0XkSpHCSqBrN++l3vfnse7czbQMbUuL17ZX7PcikiVpQSVAIrmaXrkw0XkFzq/ObULVx7XgZrVq8U7NBGRuIlZgjKzocBjQBLwjLs/UKz9EeCn4cs6QDN3bxS2pQHPAG0AB4a5+4oYhV6uZqzcxp1vzmH++h38tEsq95zVUzPciogQowRlZknAE8DJwBpguplNcvd5RX3c/eaI/jcAvSPe4gXgD+7+oZnVAwpjEXd5yt6Ty4PvLeSlaas4okEyf7+4D6f20D1NIiJFYnUE1Q9Y4u7LAMxsInA2MK+U/hcAd4d9uwPV3f1DAHffVf7hlh935/WZwT1N2XvzuOLY9tx0crpmtxURKSZWe8VWwOqI12uA/iV1NLO2QHvgo3BROpBtZq+Hy6cAt7l7QbH1rgKuAkhLSyvT4MvK4o07ufPNOXy9PIveaY0Yf86RdG/ZIN5hiYgkpET8s3048FpEAqoOHEdwym8V8DIwCng2ciV3fwp4CiAjI8NjFWw09uYW8NePFvPUZ8uoW6s6f/z5kZyf0Ub3NImI7EesEtRaggKHIq3DZSUZDvwq4vUaYFbE6cE3gQEUS1CJ6qMFG7nrrbms2baXc/u05vZhXUmpp3uaREQOJFYJajrQ2czaEySm4cCFxTuZWVegMfBVsXUbmVmqu28GTgAyyz/kw7Muey/3vD2X9+dupFOzeky8agADOjSNd1giIhVGTBKUu+eb2fXA+wRl5mPdfa6Z3QtkuvuksOtwYKK7e8S6BWZ2K/BvC0rcZgBPxyLuQ5FXUMi4/6zgkSmLKHTnt0O7cMWxuqdJRORgWUQuqDQyMjI8MzP2B1kzVmZxxxtzWLBhJyd2bcbos3rQponuaRIR2R8zm+HuGcWXJ2KRRIWzbXcuD763gInTV9OiYTJjRhzDKd2b654mEZHDoAR1GNyd12as4Y/vLmD73jyuGtyBX5/Ymbq6p0lE5LBpT3qIFm3cyZ1vzGHaiiyOaduY+8/pSbcWuqdJRKSsKEEdpD25+fzl30t45vNl1EuuzoPnHskvj9E9TSIiZU0J6iBMmbeRuyfNZW32Xn5xTGtuP60rTXVPk4hIuVCCisLa7L2MnjSXD+dtJL15PV65eiD92jeJd1giIpWaEtR+5BUUMvaL5Tw6ZTGO87uhXbn82Pa6p0lEJAaUoEqRuSK4p2nhxp2c1K0Zd5+pe5pERGJJCaoYd+eON+fw4teraNkwmadGHMMpPY6Id1giIlWOElQxZkaD5BpcPbgDN+qeJhGRuNHetwS3ndY13iGIiFR5utovIiIJSQlKREQSkhKUiIgkJCUoERFJSEpQIiKSkJSgREQkISlBiYhIQlKCEhGRhGTuHu8YypyZbQZWxmBTKcCWGGynrCnu2FLcsaW4Y6ss4m7r7qnFF1bKBBUrZpbp7hnxjuNgKe7YUtyxpbhjqzzj1ik+ERFJSEpQIiKSkJSgDs9T8Q7gECnu2FLcsaW4Y6vc4tY1KBERSUg6ghIRkYSkBCUiIglJCeoAzKyNmX1sZvPMbK6Z/bqEPseb2XYzmxU+7opHrMWZ2Qozmx3GlFlCu5nZX8xsiZl9Z2Z94hFnsZi6RHyOs8xsh5ndVKxPQnzeZjbWzDaZ2ZyIZU3M7EMzWxz+27iUdS8J+yw2s0tiF3Wpcf/JzBaE34M3zKxRKevu9ztVnkqJe7SZrY34LgwrZd2hZrYw/K7fFruoS4375YiYV5jZrFLWjefnXeK+L6bfcXfXYz8PoAXQJ3xeH1gEdC/W53jgnXjHWkLsK4CU/bQPA94FDBgAfB3vmIvFlwRsILiJL+E+b2Aw0AeYE7HsIeC28PltwIMlrNcEWBb+2zh83jjOcZ8CVA+fP1hS3NF8p+IQ92jg1ii+R0uBDkBN4Nvi/4djHXex9oeBuxLw8y5x3xfL77iOoA7A3de7+8zw+U5gPtAqvlGVmbOBFzwwFWhkZi3iHVSEE4Gl7h6LUUEOmrt/BmQVW3w28Hz4/HngnBJWPRX40N2z3H0b8CEwtNwCLaakuN39A3fPD19OBVrHKp5olfJ5R6MfsMTdl7l7LjCR4PcUE/uL28wMOA94KVbxRGs/+76YfceVoA6CmbUDegNfl9A80My+NbN3zaxHTAMrnQMfmNkMM7uqhPZWwOqI12tIrOQ7nNL/4ybi5w3Q3N3Xh883AM1L6JPon/tlBEfWJTnQdyoerg9PTY4t5XRTIn/exwEb3X1xKe0J8XkX2/fF7DuuBBUlM6sH/BO4yd13FGueSXAaqhfwV+DNWMdXimPdvQ9wGvArMxsc74CiZWY1gbOAV0toTtTP+wc8ONdRoe7jMLM7gHxgQildEu079STQETgaWE9wuqwiuYD9Hz3F/fPe376vvL/jSlBRMLMaBL+gCe7+evF2d9/h7rvC55OBGmaWEuMwf8Td14b/bgLeIDjVEWkt0CbidetwWSI4DZjp7huLNyTq5x3aWHSaNPx3Uwl9EvJzN7NRwBnAReGO50ei+E7FlLtvdPcCdy8Eni4lnkT9vKsDPwdeLq1PvD/vUvZ9MfuOK0EdQHiO+Flgvrv/uZQ+R4T9MLN+BJ/r1thFWWJMdc2sftFzgovgc4p1mwSMDKv5BgDbIw7d463UvywT8fOOMAkoqli6BHirhD7vA6eYWePwlNQp4bK4MbOhwG+Bs9x9Tyl9ovlOxVSxa6Y/o+R4pgOdzax9eGQ+nOD3FG8nAQvcfU1JjfH+vPez74vddzwe1SEV6QEcS3AI+x0wK3wMA64Brgn7XA/MJagOmgoMSoC4O4TxfBvGdke4PDJuA54gqHCaDWTEO+4wrroECadhxLKE+7wJEuh6II/gHPvlQFPg38BiYArQJOybATwTse5lwJLwcWkCxL2E4JpB0Xf872HflsDk/X2n4hz3+PC7+x3BjrNF8bjD18MIqtCWJkLc4fJxRd/piL6J9HmXtu+L2XdcQx2JiEhC0ik+ERFJSEpQIiKSkJSgREQkISlBiYhIQlKCEhGRhKQEJSI/YGbtzMzDG0lF4kYJSuQghNMf5BYfucLMvgl36u3KeHtFyWJX+NhoZu+Y2clluI0VZnZSWb2fSFlRghI5eMsJRroAwMyOBOqU8zYbuXs9oBfByNBvhEMTiVRaSlAiB288MDLi9SXAC5EdzOz08Khqh5mtNrPREW3nm9lyM2sQvj7NzDaYWeqBNuzuG9z9MYJ5kB40s2rhe7Q0s3+a2ebwvW+M2N5oM3vNgknydprZTDPrFbaNB9KAt8MjtN9GbO4iM1tlZlvCQWRFYkoJSuTgTQUamFk3M0siGNvtH8X67CZIYo2A04FrzewcAHd/GfgS+IuZNSUY7+wKd998EDG8DjQDuoRJ6m2CIXFaEcyjdZOZnRrR/2yCkeGbAC8Cb5pZDXcfAawCznT3eu7+UMQ6xwJdwve7y8y6HUR8IodNCUrk0BQdRZ1MMJHbD0ZqdvdP3H22uxe6+3cE47ENiejyK+AE4BPgbXd/5yC3vy78twnQF0h193vdPdfdlxGM7D08ov8Md3/N3fOAPwPJBLMo78897r7X3YvGg+t1kDGKHBZV6YgcmvHAZ0B7ip3eAzCz/sADQE+CacZrETG3lbtnm9mrwP8Dzj2E7RdN/pYFHAm0NLPsiPYk4POI199PHufuhWa2hmBg0v3ZEPF8D1DvEOIUOWQ6ghI5BB5MQ7+cYHTnH80RRnAabRLQxt0bAn8nGD0eADM7mmC055eAvxxCCD8jmIdnIUHyWe7ujSIe9d19WET/7+fmCU8Jtua/R2EaMVoSkhKUyKG7HDjB3XeX0FYfyHL3nHDOqguLGswsmeCa1e+BS4FWZnZdNBs0s+Zmdj1wN3C7BxP1TQN2mtnvzKy2mSWZWU8z6xux6jFm9vPw3qabgH0E19IANhJM7SCSUJSgRA6Ruy9198xSmq8D7jWzncBdwCsRbX8EVrv7k+6+D7gYuN/MOu9nc9lmtptg7qNhwC/dfWwYRwHBTLhHExzVbQGeARpGrP8WcD6wDRgB/Dy8HlUUz51mlm1mt0b544uUO80HJVLJhSXundz94njHInIwdAQlIiIJSQlKREQSkk7xiYhIQtIRlIiIJCQlKBERSUhKUCIikpCUoEREJCEpQYmISEL6/6Cm+yXgxI8nAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "from matplotlib import rcParams\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "plt.plot(max_depth,score_list)\n",
    "plt.xlabel(\"Max Depth\", fontsize=12)\n",
    "plt.ylabel(\"Accuracy\", fontsize=12)\n",
    "plt.title(\"Max_Depth vs Accuracy\")\n",
    "rcParams.update({'figure.autolayout': True})\n",
    "plt.rc('xtick', labelsize=12) \n",
    "plt.rc('ytick', labelsize=12) \n",
    "plt.rc('font', family='serif')\n",
    "plt.tight_layout()\n",
    "plt.savefig('/Users/pprusty05/google_drive/Data_Mining/Project/plots/maxDepth_accuracy_accident.pdf')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Decision tree with max depth 20"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----------+--------+----------------------+\n",
      "|prediction|Severity|scaled_features_vector|\n",
      "+----------+--------+----------------------+\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     1.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "|       2.0|     2.0|  (45,[0,1,2,3,4,5,...|\n",
      "+----------+--------+----------------------+\n",
      "only showing top 5 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Train a DecisionTree model.\n",
    "dt = DecisionTreeClassifier(labelCol=\"Severity\", featuresCol=\"scaled_features_vector\", maxDepth=20)\n",
    "\n",
    "# Chain indexers and tree in a Pipeline\n",
    "pipeline = Pipeline(stages=[dt])\n",
    "\n",
    "# Train model.  This also runs the indexers.\n",
    "model = pipeline.fit(trainingData)\n",
    "\n",
    "# Make predictions.\n",
    "predictions = model.transform(testData)\n",
    "\n",
    "# Select example rows to display.\n",
    "predictions.select(\"prediction\", \"Severity\", \"scaled_features_vector\").show(5)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "evaluator = MulticlassClassificationEvaluator(\n",
    "    labelCol=\"Severity\", predictionCol=\"prediction\", metricName=\"accuracy\")\n",
    "accuracy = evaluator.evaluate(predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8563148602871328"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "evaluator = MulticlassClassificationEvaluator(\n",
    "    labelCol=\"Severity\", predictionCol=\"prediction\", metricName=\"f1\")\n",
    "f1_score = evaluator.evaluate(predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8548318553467797"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f1_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "evaluator = MulticlassClassificationEvaluator(\n",
    "    labelCol=\"Severity\", predictionCol=\"prediction\", metricName=\"weightedPrecision\")\n",
    "Precision = evaluator.evaluate(predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.85389708857649"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Precision"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "evaluator = MulticlassClassificationEvaluator(\n",
    "    labelCol=\"Severity\", predictionCol=\"prediction\", metricName=\"weightedRecall\")\n",
    "Recall = evaluator.evaluate(predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8563148602871328"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Recall"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
